| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.0, |
| "eval_steps": 200, |
| "global_step": 8312, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1, |
| "eval_loss": 3.2670648097991943, |
| "eval_runtime": 692.0326, |
| "eval_samples_per_second": 5.263, |
| "eval_steps_per_second": 0.659, |
| "eval_wer": 1.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_loss": 2.8740603923797607, |
| "eval_runtime": 683.8046, |
| "eval_samples_per_second": 5.326, |
| "eval_steps_per_second": 0.667, |
| "eval_wer": 1.0006963141769567, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0009517778860204579, |
| "loss": 3.8381, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.29, |
| "eval_loss": 2.761221170425415, |
| "eval_runtime": 683.642, |
| "eval_samples_per_second": 5.327, |
| "eval_steps_per_second": 0.667, |
| "eval_wer": 0.9954507473772166, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.38, |
| "eval_loss": 2.633348226547241, |
| "eval_runtime": 684.1815, |
| "eval_samples_per_second": 5.323, |
| "eval_steps_per_second": 0.666, |
| "eval_wer": 0.9981431621947823, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.000890891378470531, |
| "loss": 2.6996, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.48, |
| "eval_loss": 2.3073549270629883, |
| "eval_runtime": 686.3923, |
| "eval_samples_per_second": 5.306, |
| "eval_steps_per_second": 0.664, |
| "eval_wer": 0.9770680531055612, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.58, |
| "eval_loss": 2.0154612064361572, |
| "eval_runtime": 686.5478, |
| "eval_samples_per_second": 5.305, |
| "eval_steps_per_second": 0.664, |
| "eval_wer": 0.9286045863893789, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_loss": 1.9155136346817017, |
| "eval_runtime": 689.547, |
| "eval_samples_per_second": 5.282, |
| "eval_steps_per_second": 0.661, |
| "eval_wer": 0.8947172964441557, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.000830004870920604, |
| "loss": 2.2919, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_loss": 1.641204595565796, |
| "eval_runtime": 685.3748, |
| "eval_samples_per_second": 5.314, |
| "eval_steps_per_second": 0.665, |
| "eval_wer": 0.8813944851917185, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.87, |
| "eval_loss": 1.4531193971633911, |
| "eval_runtime": 689.2035, |
| "eval_samples_per_second": 5.284, |
| "eval_steps_per_second": 0.662, |
| "eval_wer": 0.8285210286881441, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.0007691183633706771, |
| "loss": 1.5872, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.96, |
| "eval_loss": 0.1812867820262909, |
| "eval_runtime": 685.9058, |
| "eval_samples_per_second": 5.31, |
| "eval_steps_per_second": 0.665, |
| "eval_wer": 0.2060161544889054, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.06, |
| "eval_loss": 0.1635832041501999, |
| "eval_runtime": 687.9409, |
| "eval_samples_per_second": 5.294, |
| "eval_steps_per_second": 0.663, |
| "eval_wer": 0.18062389750255314, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.15, |
| "eval_loss": 0.155806764960289, |
| "eval_runtime": 692.1735, |
| "eval_samples_per_second": 5.262, |
| "eval_steps_per_second": 0.659, |
| "eval_wer": 0.17444991180020425, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.0007084754018509498, |
| "loss": 0.2659, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.25, |
| "eval_loss": 0.152183398604393, |
| "eval_runtime": 688.556, |
| "eval_samples_per_second": 5.289, |
| "eval_steps_per_second": 0.662, |
| "eval_wer": 0.1646550923776808, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.35, |
| "eval_loss": 0.15532232820987701, |
| "eval_runtime": 688.1144, |
| "eval_samples_per_second": 5.293, |
| "eval_steps_per_second": 0.663, |
| "eval_wer": 0.16641908829263763, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.0006475888943010228, |
| "loss": 0.2436, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.44, |
| "eval_loss": 0.1840931922197342, |
| "eval_runtime": 692.811, |
| "eval_samples_per_second": 5.257, |
| "eval_steps_per_second": 0.658, |
| "eval_wer": 0.1960820722309906, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.54, |
| "eval_loss": 0.14190182089805603, |
| "eval_runtime": 690.3365, |
| "eval_samples_per_second": 5.276, |
| "eval_steps_per_second": 0.661, |
| "eval_wer": 0.1640051991458546, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.64, |
| "eval_loss": 0.14559713006019592, |
| "eval_runtime": 685.0999, |
| "eval_samples_per_second": 5.316, |
| "eval_steps_per_second": 0.666, |
| "eval_wer": 0.17143255036672547, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 0.000586702386751096, |
| "loss": 0.2464, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.73, |
| "eval_loss": 0.14024095237255096, |
| "eval_runtime": 692.5402, |
| "eval_samples_per_second": 5.259, |
| "eval_steps_per_second": 0.658, |
| "eval_wer": 0.16070931204159317, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.83, |
| "eval_loss": 0.1345185786485672, |
| "eval_runtime": 694.4502, |
| "eval_samples_per_second": 5.244, |
| "eval_steps_per_second": 0.657, |
| "eval_wer": 0.1528177513694179, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 0.000525815879201169, |
| "loss": 0.2292, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_loss": 0.134234219789505, |
| "eval_runtime": 693.8578, |
| "eval_samples_per_second": 5.249, |
| "eval_steps_per_second": 0.657, |
| "eval_wer": 0.155556587132114, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.02, |
| "eval_loss": 0.13340923190116882, |
| "eval_runtime": 684.0209, |
| "eval_samples_per_second": 5.324, |
| "eval_steps_per_second": 0.667, |
| "eval_wer": 0.15518521957107045, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.12, |
| "eval_loss": 0.13518257439136505, |
| "eval_runtime": 687.2622, |
| "eval_samples_per_second": 5.299, |
| "eval_steps_per_second": 0.664, |
| "eval_wer": 0.1543496425587225, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 0.0004649293716512421, |
| "loss": 0.2209, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.21, |
| "eval_loss": 0.13499902188777924, |
| "eval_runtime": 691.8679, |
| "eval_samples_per_second": 5.264, |
| "eval_steps_per_second": 0.659, |
| "eval_wer": 0.1537925912171572, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.31, |
| "eval_loss": 0.13418444991111755, |
| "eval_runtime": 685.3615, |
| "eval_samples_per_second": 5.314, |
| "eval_steps_per_second": 0.665, |
| "eval_wer": 0.1530498560950701, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 0.00040404286410131515, |
| "loss": 0.2136, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.41, |
| "eval_loss": 0.1319747269153595, |
| "eval_runtime": 688.5799, |
| "eval_samples_per_second": 5.289, |
| "eval_steps_per_second": 0.662, |
| "eval_wer": 0.1540246959428094, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.5, |
| "eval_loss": 0.13689081370830536, |
| "eval_runtime": 691.8314, |
| "eval_samples_per_second": 5.264, |
| "eval_steps_per_second": 0.659, |
| "eval_wer": 0.15690279454089684, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.6, |
| "eval_loss": 0.13139554858207703, |
| "eval_runtime": 689.159, |
| "eval_samples_per_second": 5.285, |
| "eval_steps_per_second": 0.662, |
| "eval_wer": 0.1516572277411568, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 0.0003431563565513882, |
| "loss": 0.2154, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.69, |
| "eval_loss": 0.1303856372833252, |
| "eval_runtime": 694.7157, |
| "eval_samples_per_second": 5.242, |
| "eval_steps_per_second": 0.656, |
| "eval_wer": 0.15063596694828707, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.79, |
| "eval_loss": 0.13201411068439484, |
| "eval_runtime": 691.5101, |
| "eval_samples_per_second": 5.267, |
| "eval_steps_per_second": 0.659, |
| "eval_wer": 0.15072880883854795, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 0.0002822698490014613, |
| "loss": 0.2123, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.89, |
| "eval_loss": 0.13187964260578156, |
| "eval_runtime": 687.2712, |
| "eval_samples_per_second": 5.299, |
| "eval_steps_per_second": 0.663, |
| "eval_wer": 0.1523999628632439, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.98, |
| "eval_loss": 0.12917861342430115, |
| "eval_runtime": 691.2948, |
| "eval_samples_per_second": 5.268, |
| "eval_steps_per_second": 0.66, |
| "eval_wer": 0.1523999628632439, |
| "step": 6200 |
| }, |
| { |
| "epoch": 3.08, |
| "eval_loss": 0.12825024127960205, |
| "eval_runtime": 689.7813, |
| "eval_samples_per_second": 5.28, |
| "eval_steps_per_second": 0.661, |
| "eval_wer": 0.1488255500881998, |
| "step": 6400 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 0.00022138334145153436, |
| "loss": 0.2109, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.18, |
| "eval_loss": 0.1257564276456833, |
| "eval_runtime": 687.694, |
| "eval_samples_per_second": 5.296, |
| "eval_steps_per_second": 0.663, |
| "eval_wer": 0.14919691764924334, |
| "step": 6600 |
| }, |
| { |
| "epoch": 3.27, |
| "eval_loss": 0.12906372547149658, |
| "eval_runtime": 687.8093, |
| "eval_samples_per_second": 5.295, |
| "eval_steps_per_second": 0.663, |
| "eval_wer": 0.1488255500881998, |
| "step": 6800 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 0.0001604968339016074, |
| "loss": 0.2103, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.37, |
| "eval_loss": 0.12778830528259277, |
| "eval_runtime": 694.3257, |
| "eval_samples_per_second": 5.245, |
| "eval_steps_per_second": 0.657, |
| "eval_wer": 0.14840776158202582, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.46, |
| "eval_loss": 0.12501177191734314, |
| "eval_runtime": 679.1124, |
| "eval_samples_per_second": 5.363, |
| "eval_steps_per_second": 0.671, |
| "eval_wer": 0.14780428929533004, |
| "step": 7200 |
| }, |
| { |
| "epoch": 3.56, |
| "eval_loss": 0.12769711017608643, |
| "eval_runtime": 683.2755, |
| "eval_samples_per_second": 5.33, |
| "eval_steps_per_second": 0.667, |
| "eval_wer": 0.14822207780150404, |
| "step": 7400 |
| }, |
| { |
| "epoch": 3.61, |
| "learning_rate": 9.961032635168047e-05, |
| "loss": 0.1986, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.66, |
| "eval_loss": 0.1256353259086609, |
| "eval_runtime": 680.6384, |
| "eval_samples_per_second": 5.351, |
| "eval_steps_per_second": 0.67, |
| "eval_wer": 0.14757218456967783, |
| "step": 7600 |
| }, |
| { |
| "epoch": 3.75, |
| "eval_loss": 0.12579868733882904, |
| "eval_runtime": 683.2757, |
| "eval_samples_per_second": 5.33, |
| "eval_steps_per_second": 0.667, |
| "eval_wer": 0.14682944944759074, |
| "step": 7800 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 3.884559181685338e-05, |
| "loss": 0.1954, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.85, |
| "eval_loss": 0.12557055056095123, |
| "eval_runtime": 690.701, |
| "eval_samples_per_second": 5.273, |
| "eval_steps_per_second": 0.66, |
| "eval_wer": 0.14645808188654721, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.95, |
| "eval_loss": 0.12530682981014252, |
| "eval_runtime": 692.3328, |
| "eval_samples_per_second": 5.26, |
| "eval_steps_per_second": 0.659, |
| "eval_wer": 0.1455760839290688, |
| "step": 8200 |
| }, |
| { |
| "epoch": 4.0, |
| "step": 8312, |
| "total_flos": 1.5580571693960135e+19, |
| "train_loss": 0.7944976037459608, |
| "train_runtime": 46739.0877, |
| "train_samples_per_second": 0.711, |
| "train_steps_per_second": 0.178 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 8312, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 200, |
| "total_flos": 1.5580571693960135e+19, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|