{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.22077922077922, "global_step": 18000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.81, "learning_rate": 5e-05, "loss": 8.6952, "step": 500 }, { "epoch": 0.81, "eval_loss": 3.0705761909484863, "eval_runtime": 8.0191, "eval_samples_per_second": 25.813, "eval_steps_per_second": 3.242, "eval_wer": 1.1315192743764173, "step": 500 }, { "epoch": 1.62, "learning_rate": 0.0001, "loss": 3.2723, "step": 1000 }, { "epoch": 1.62, "eval_loss": 2.9638686180114746, "eval_runtime": 8.0684, "eval_samples_per_second": 25.656, "eval_steps_per_second": 3.222, "eval_wer": 1.0, "step": 1000 }, { "epoch": 2.44, "learning_rate": 9.713958810068651e-05, "loss": 3.2191, "step": 1500 }, { "epoch": 2.44, "eval_loss": 3.0145716667175293, "eval_runtime": 8.0711, "eval_samples_per_second": 25.647, "eval_steps_per_second": 3.221, "eval_wer": 1.0, "step": 1500 }, { "epoch": 3.25, "learning_rate": 9.4279176201373e-05, "loss": 3.0698, "step": 2000 }, { "epoch": 3.25, "eval_loss": 2.9972307682037354, "eval_runtime": 8.0326, "eval_samples_per_second": 25.77, "eval_steps_per_second": 3.237, "eval_wer": 1.3356009070294785, "step": 2000 }, { "epoch": 4.06, "learning_rate": 9.14187643020595e-05, "loss": 3.1918, "step": 2500 }, { "epoch": 4.06, "eval_loss": 2.9555039405822754, "eval_runtime": 8.0447, "eval_samples_per_second": 25.731, "eval_steps_per_second": 3.232, "eval_wer": 1.0, "step": 2500 }, { "epoch": 4.87, "learning_rate": 8.8558352402746e-05, "loss": 3.0932, "step": 3000 }, { "epoch": 4.87, "eval_loss": 2.9776482582092285, "eval_runtime": 8.053, "eval_samples_per_second": 25.705, "eval_steps_per_second": 3.229, "eval_wer": 1.0, "step": 3000 }, { "epoch": 5.68, "learning_rate": 8.569794050343249e-05, "loss": 3.2271, "step": 3500 }, { "epoch": 5.68, "eval_loss": 3.055955410003662, "eval_runtime": 8.1309, "eval_samples_per_second": 25.458, "eval_steps_per_second": 3.198, "eval_wer": 1.3378684807256236, "step": 3500 }, { "epoch": 6.49, "learning_rate": 8.283752860411899e-05, "loss": 3.2925, "step": 4000 }, { "epoch": 6.49, "eval_loss": 2.9724416732788086, "eval_runtime": 8.0677, "eval_samples_per_second": 25.658, "eval_steps_per_second": 3.223, "eval_wer": 1.3900226757369614, "step": 4000 }, { "epoch": 7.31, "learning_rate": 7.99771167048055e-05, "loss": 3.2195, "step": 4500 }, { "epoch": 7.31, "eval_loss": 3.5231425762176514, "eval_runtime": 8.1433, "eval_samples_per_second": 25.42, "eval_steps_per_second": 3.193, "eval_wer": 1.0, "step": 4500 }, { "epoch": 8.12, "learning_rate": 7.711670480549199e-05, "loss": 3.3582, "step": 5000 }, { "epoch": 8.12, "eval_loss": 3.749286413192749, "eval_runtime": 8.0117, "eval_samples_per_second": 25.837, "eval_steps_per_second": 3.245, "eval_wer": 1.0249433106575965, "step": 5000 }, { "epoch": 8.93, "learning_rate": 7.42562929061785e-05, "loss": 3.3233, "step": 5500 }, { "epoch": 8.93, "eval_loss": 3.3524105548858643, "eval_runtime": 8.2055, "eval_samples_per_second": 25.227, "eval_steps_per_second": 3.169, "eval_wer": 1.0, "step": 5500 }, { "epoch": 9.74, "learning_rate": 7.1395881006865e-05, "loss": 3.1679, "step": 6000 }, { "epoch": 9.74, "eval_loss": 3.186889410018921, "eval_runtime": 8.0182, "eval_samples_per_second": 25.816, "eval_steps_per_second": 3.243, "eval_wer": 1.3877551020408163, "step": 6000 }, { "epoch": 10.55, "learning_rate": 6.853546910755149e-05, "loss": 3.1393, "step": 6500 }, { "epoch": 10.55, "eval_loss": 3.0084877014160156, "eval_runtime": 8.0014, "eval_samples_per_second": 25.871, "eval_steps_per_second": 3.249, "eval_wer": 1.2947845804988662, "step": 6500 }, { "epoch": 11.36, "learning_rate": 6.5675057208238e-05, "loss": 3.1699, "step": 7000 }, { "epoch": 11.36, "eval_loss": 2.997206449508667, "eval_runtime": 8.1145, "eval_samples_per_second": 25.51, "eval_steps_per_second": 3.204, "eval_wer": 1.1337868480725624, "step": 7000 }, { "epoch": 12.18, "learning_rate": 6.281464530892449e-05, "loss": 3.3382, "step": 7500 }, { "epoch": 12.18, "eval_loss": 2.9686625003814697, "eval_runtime": 8.0023, "eval_samples_per_second": 25.868, "eval_steps_per_second": 3.249, "eval_wer": 1.3877551020408163, "step": 7500 }, { "epoch": 12.99, "learning_rate": 5.9954233409610984e-05, "loss": 3.0454, "step": 8000 }, { "epoch": 12.99, "eval_loss": 2.968928337097168, "eval_runtime": 8.2051, "eval_samples_per_second": 25.228, "eval_steps_per_second": 3.169, "eval_wer": 1.383219954648526, "step": 8000 }, { "epoch": 13.8, "learning_rate": 5.709382151029748e-05, "loss": 3.0609, "step": 8500 }, { "epoch": 13.8, "eval_loss": 2.902815580368042, "eval_runtime": 8.0338, "eval_samples_per_second": 25.766, "eval_steps_per_second": 3.236, "eval_wer": 1.3900226757369614, "step": 8500 }, { "epoch": 14.61, "learning_rate": 5.423340961098399e-05, "loss": 3.0224, "step": 9000 }, { "epoch": 14.61, "eval_loss": 2.9064831733703613, "eval_runtime": 8.2142, "eval_samples_per_second": 25.2, "eval_steps_per_second": 3.165, "eval_wer": 1.3877551020408163, "step": 9000 }, { "epoch": 15.42, "learning_rate": 5.137299771167048e-05, "loss": 3.0156, "step": 9500 }, { "epoch": 15.42, "eval_loss": 2.8854894638061523, "eval_runtime": 7.9943, "eval_samples_per_second": 25.893, "eval_steps_per_second": 3.252, "eval_wer": 1.3900226757369614, "step": 9500 }, { "epoch": 16.23, "learning_rate": 4.851258581235698e-05, "loss": 3.0317, "step": 10000 }, { "epoch": 16.23, "eval_loss": 2.8956820964813232, "eval_runtime": 8.1264, "eval_samples_per_second": 25.473, "eval_steps_per_second": 3.199, "eval_wer": 1.3900226757369614, "step": 10000 }, { "epoch": 17.05, "learning_rate": 4.565217391304348e-05, "loss": 3.0184, "step": 10500 }, { "epoch": 17.05, "eval_loss": 2.88946270942688, "eval_runtime": 8.0387, "eval_samples_per_second": 25.751, "eval_steps_per_second": 3.234, "eval_wer": 1.3900226757369614, "step": 10500 }, { "epoch": 17.86, "learning_rate": 4.279176201372998e-05, "loss": 3.0852, "step": 11000 }, { "epoch": 17.86, "eval_loss": 2.8936383724212646, "eval_runtime": 8.1682, "eval_samples_per_second": 25.342, "eval_steps_per_second": 3.183, "eval_wer": 1.3900226757369614, "step": 11000 }, { "epoch": 18.67, "learning_rate": 3.993135011441648e-05, "loss": 3.0017, "step": 11500 }, { "epoch": 18.67, "eval_loss": 2.8695127964019775, "eval_runtime": 8.169, "eval_samples_per_second": 25.34, "eval_steps_per_second": 3.183, "eval_wer": 1.3900226757369614, "step": 11500 }, { "epoch": 19.48, "learning_rate": 3.707093821510298e-05, "loss": 2.9337, "step": 12000 }, { "epoch": 19.48, "eval_loss": 2.8768134117126465, "eval_runtime": 8.1024, "eval_samples_per_second": 25.548, "eval_steps_per_second": 3.209, "eval_wer": 1.3900226757369614, "step": 12000 }, { "epoch": 20.29, "learning_rate": 3.421052631578947e-05, "loss": 3.0017, "step": 12500 }, { "epoch": 20.29, "eval_loss": 2.8580081462860107, "eval_runtime": 8.1811, "eval_samples_per_second": 25.302, "eval_steps_per_second": 3.178, "eval_wer": 1.3900226757369614, "step": 12500 }, { "epoch": 21.1, "learning_rate": 3.135011441647597e-05, "loss": 2.9472, "step": 13000 }, { "epoch": 21.1, "eval_loss": 2.839965343475342, "eval_runtime": 8.2839, "eval_samples_per_second": 24.988, "eval_steps_per_second": 3.139, "eval_wer": 1.3900226757369614, "step": 13000 }, { "epoch": 21.92, "learning_rate": 2.8489702517162476e-05, "loss": 3.0214, "step": 13500 }, { "epoch": 21.92, "eval_loss": 2.856123924255371, "eval_runtime": 8.1176, "eval_samples_per_second": 25.5, "eval_steps_per_second": 3.203, "eval_wer": 1.3900226757369614, "step": 13500 }, { "epoch": 22.73, "learning_rate": 2.562929061784897e-05, "loss": 2.9336, "step": 14000 }, { "epoch": 22.73, "eval_loss": 2.879206895828247, "eval_runtime": 8.1391, "eval_samples_per_second": 25.433, "eval_steps_per_second": 3.194, "eval_wer": 1.3900226757369614, "step": 14000 }, { "epoch": 23.54, "learning_rate": 2.276887871853547e-05, "loss": 3.0134, "step": 14500 }, { "epoch": 23.54, "eval_loss": 2.8472487926483154, "eval_runtime": 8.0887, "eval_samples_per_second": 25.591, "eval_steps_per_second": 3.214, "eval_wer": 1.3900226757369614, "step": 14500 }, { "epoch": 24.35, "learning_rate": 1.990846681922197e-05, "loss": 2.9433, "step": 15000 }, { "epoch": 24.35, "eval_loss": 2.8818936347961426, "eval_runtime": 8.193, "eval_samples_per_second": 25.266, "eval_steps_per_second": 3.173, "eval_wer": 1.3900226757369614, "step": 15000 }, { "epoch": 25.16, "learning_rate": 1.7048054919908468e-05, "loss": 2.8536, "step": 15500 }, { "epoch": 25.16, "eval_loss": 2.837463140487671, "eval_runtime": 8.1663, "eval_samples_per_second": 25.348, "eval_steps_per_second": 3.184, "eval_wer": 1.3900226757369614, "step": 15500 }, { "epoch": 25.97, "learning_rate": 1.4187643020594965e-05, "loss": 2.8742, "step": 16000 }, { "epoch": 25.97, "eval_loss": 2.857389450073242, "eval_runtime": 8.194, "eval_samples_per_second": 25.262, "eval_steps_per_second": 3.173, "eval_wer": 1.3900226757369614, "step": 16000 }, { "epoch": 26.79, "learning_rate": 1.1327231121281464e-05, "loss": 2.8298, "step": 16500 }, { "epoch": 26.79, "eval_loss": 2.982081651687622, "eval_runtime": 8.1935, "eval_samples_per_second": 25.264, "eval_steps_per_second": 3.173, "eval_wer": 1.3900226757369614, "step": 16500 }, { "epoch": 27.6, "learning_rate": 8.466819221967964e-06, "loss": 2.7439, "step": 17000 }, { "epoch": 27.6, "eval_loss": 3.2741587162017822, "eval_runtime": 8.2267, "eval_samples_per_second": 25.162, "eval_steps_per_second": 3.16, "eval_wer": 1.3900226757369614, "step": 17000 }, { "epoch": 28.41, "learning_rate": 5.606407322654463e-06, "loss": 2.7008, "step": 17500 }, { "epoch": 28.41, "eval_loss": 3.365966320037842, "eval_runtime": 8.203, "eval_samples_per_second": 25.235, "eval_steps_per_second": 3.17, "eval_wer": 1.3900226757369614, "step": 17500 }, { "epoch": 29.22, "learning_rate": 2.745995423340961e-06, "loss": 2.7087, "step": 18000 }, { "epoch": 29.22, "eval_loss": 3.812533140182495, "eval_runtime": 8.2382, "eval_samples_per_second": 25.127, "eval_steps_per_second": 3.156, "eval_wer": 1.3900226757369614, "step": 18000 } ], "max_steps": 18480, "num_train_epochs": 30, "total_flos": 4.093361821503758e+18, "trial_name": null, "trial_params": null }