| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 29.22077922077922, | |
| "global_step": 18000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5e-05, | |
| "loss": 8.6952, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_loss": 3.0705761909484863, | |
| "eval_runtime": 8.0191, | |
| "eval_samples_per_second": 25.813, | |
| "eval_steps_per_second": 3.242, | |
| "eval_wer": 1.1315192743764173, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.0001, | |
| "loss": 3.2723, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_loss": 2.9638686180114746, | |
| "eval_runtime": 8.0684, | |
| "eval_samples_per_second": 25.656, | |
| "eval_steps_per_second": 3.222, | |
| "eval_wer": 1.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.713958810068651e-05, | |
| "loss": 3.2191, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "eval_loss": 3.0145716667175293, | |
| "eval_runtime": 8.0711, | |
| "eval_samples_per_second": 25.647, | |
| "eval_steps_per_second": 3.221, | |
| "eval_wer": 1.0, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 9.4279176201373e-05, | |
| "loss": 3.0698, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "eval_loss": 2.9972307682037354, | |
| "eval_runtime": 8.0326, | |
| "eval_samples_per_second": 25.77, | |
| "eval_steps_per_second": 3.237, | |
| "eval_wer": 1.3356009070294785, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 9.14187643020595e-05, | |
| "loss": 3.1918, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "eval_loss": 2.9555039405822754, | |
| "eval_runtime": 8.0447, | |
| "eval_samples_per_second": 25.731, | |
| "eval_steps_per_second": 3.232, | |
| "eval_wer": 1.0, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 8.8558352402746e-05, | |
| "loss": 3.0932, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "eval_loss": 2.9776482582092285, | |
| "eval_runtime": 8.053, | |
| "eval_samples_per_second": 25.705, | |
| "eval_steps_per_second": 3.229, | |
| "eval_wer": 1.0, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 8.569794050343249e-05, | |
| "loss": 3.2271, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "eval_loss": 3.055955410003662, | |
| "eval_runtime": 8.1309, | |
| "eval_samples_per_second": 25.458, | |
| "eval_steps_per_second": 3.198, | |
| "eval_wer": 1.3378684807256236, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 8.283752860411899e-05, | |
| "loss": 3.2925, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "eval_loss": 2.9724416732788086, | |
| "eval_runtime": 8.0677, | |
| "eval_samples_per_second": 25.658, | |
| "eval_steps_per_second": 3.223, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 7.99771167048055e-05, | |
| "loss": 3.2195, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "eval_loss": 3.5231425762176514, | |
| "eval_runtime": 8.1433, | |
| "eval_samples_per_second": 25.42, | |
| "eval_steps_per_second": 3.193, | |
| "eval_wer": 1.0, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 7.711670480549199e-05, | |
| "loss": 3.3582, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "eval_loss": 3.749286413192749, | |
| "eval_runtime": 8.0117, | |
| "eval_samples_per_second": 25.837, | |
| "eval_steps_per_second": 3.245, | |
| "eval_wer": 1.0249433106575965, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 7.42562929061785e-05, | |
| "loss": 3.3233, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "eval_loss": 3.3524105548858643, | |
| "eval_runtime": 8.2055, | |
| "eval_samples_per_second": 25.227, | |
| "eval_steps_per_second": 3.169, | |
| "eval_wer": 1.0, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 9.74, | |
| "learning_rate": 7.1395881006865e-05, | |
| "loss": 3.1679, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 9.74, | |
| "eval_loss": 3.186889410018921, | |
| "eval_runtime": 8.0182, | |
| "eval_samples_per_second": 25.816, | |
| "eval_steps_per_second": 3.243, | |
| "eval_wer": 1.3877551020408163, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 10.55, | |
| "learning_rate": 6.853546910755149e-05, | |
| "loss": 3.1393, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 10.55, | |
| "eval_loss": 3.0084877014160156, | |
| "eval_runtime": 8.0014, | |
| "eval_samples_per_second": 25.871, | |
| "eval_steps_per_second": 3.249, | |
| "eval_wer": 1.2947845804988662, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 11.36, | |
| "learning_rate": 6.5675057208238e-05, | |
| "loss": 3.1699, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 11.36, | |
| "eval_loss": 2.997206449508667, | |
| "eval_runtime": 8.1145, | |
| "eval_samples_per_second": 25.51, | |
| "eval_steps_per_second": 3.204, | |
| "eval_wer": 1.1337868480725624, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 12.18, | |
| "learning_rate": 6.281464530892449e-05, | |
| "loss": 3.3382, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 12.18, | |
| "eval_loss": 2.9686625003814697, | |
| "eval_runtime": 8.0023, | |
| "eval_samples_per_second": 25.868, | |
| "eval_steps_per_second": 3.249, | |
| "eval_wer": 1.3877551020408163, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "learning_rate": 5.9954233409610984e-05, | |
| "loss": 3.0454, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "eval_loss": 2.968928337097168, | |
| "eval_runtime": 8.2051, | |
| "eval_samples_per_second": 25.228, | |
| "eval_steps_per_second": 3.169, | |
| "eval_wer": 1.383219954648526, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 13.8, | |
| "learning_rate": 5.709382151029748e-05, | |
| "loss": 3.0609, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 13.8, | |
| "eval_loss": 2.902815580368042, | |
| "eval_runtime": 8.0338, | |
| "eval_samples_per_second": 25.766, | |
| "eval_steps_per_second": 3.236, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 14.61, | |
| "learning_rate": 5.423340961098399e-05, | |
| "loss": 3.0224, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 14.61, | |
| "eval_loss": 2.9064831733703613, | |
| "eval_runtime": 8.2142, | |
| "eval_samples_per_second": 25.2, | |
| "eval_steps_per_second": 3.165, | |
| "eval_wer": 1.3877551020408163, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 15.42, | |
| "learning_rate": 5.137299771167048e-05, | |
| "loss": 3.0156, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 15.42, | |
| "eval_loss": 2.8854894638061523, | |
| "eval_runtime": 7.9943, | |
| "eval_samples_per_second": 25.893, | |
| "eval_steps_per_second": 3.252, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 16.23, | |
| "learning_rate": 4.851258581235698e-05, | |
| "loss": 3.0317, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 16.23, | |
| "eval_loss": 2.8956820964813232, | |
| "eval_runtime": 8.1264, | |
| "eval_samples_per_second": 25.473, | |
| "eval_steps_per_second": 3.199, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 17.05, | |
| "learning_rate": 4.565217391304348e-05, | |
| "loss": 3.0184, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 17.05, | |
| "eval_loss": 2.88946270942688, | |
| "eval_runtime": 8.0387, | |
| "eval_samples_per_second": 25.751, | |
| "eval_steps_per_second": 3.234, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 17.86, | |
| "learning_rate": 4.279176201372998e-05, | |
| "loss": 3.0852, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 17.86, | |
| "eval_loss": 2.8936383724212646, | |
| "eval_runtime": 8.1682, | |
| "eval_samples_per_second": 25.342, | |
| "eval_steps_per_second": 3.183, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 18.67, | |
| "learning_rate": 3.993135011441648e-05, | |
| "loss": 3.0017, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 18.67, | |
| "eval_loss": 2.8695127964019775, | |
| "eval_runtime": 8.169, | |
| "eval_samples_per_second": 25.34, | |
| "eval_steps_per_second": 3.183, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 19.48, | |
| "learning_rate": 3.707093821510298e-05, | |
| "loss": 2.9337, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 19.48, | |
| "eval_loss": 2.8768134117126465, | |
| "eval_runtime": 8.1024, | |
| "eval_samples_per_second": 25.548, | |
| "eval_steps_per_second": 3.209, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 20.29, | |
| "learning_rate": 3.421052631578947e-05, | |
| "loss": 3.0017, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 20.29, | |
| "eval_loss": 2.8580081462860107, | |
| "eval_runtime": 8.1811, | |
| "eval_samples_per_second": 25.302, | |
| "eval_steps_per_second": 3.178, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 21.1, | |
| "learning_rate": 3.135011441647597e-05, | |
| "loss": 2.9472, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 21.1, | |
| "eval_loss": 2.839965343475342, | |
| "eval_runtime": 8.2839, | |
| "eval_samples_per_second": 24.988, | |
| "eval_steps_per_second": 3.139, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 21.92, | |
| "learning_rate": 2.8489702517162476e-05, | |
| "loss": 3.0214, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 21.92, | |
| "eval_loss": 2.856123924255371, | |
| "eval_runtime": 8.1176, | |
| "eval_samples_per_second": 25.5, | |
| "eval_steps_per_second": 3.203, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 22.73, | |
| "learning_rate": 2.562929061784897e-05, | |
| "loss": 2.9336, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 22.73, | |
| "eval_loss": 2.879206895828247, | |
| "eval_runtime": 8.1391, | |
| "eval_samples_per_second": 25.433, | |
| "eval_steps_per_second": 3.194, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 23.54, | |
| "learning_rate": 2.276887871853547e-05, | |
| "loss": 3.0134, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 23.54, | |
| "eval_loss": 2.8472487926483154, | |
| "eval_runtime": 8.0887, | |
| "eval_samples_per_second": 25.591, | |
| "eval_steps_per_second": 3.214, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 24.35, | |
| "learning_rate": 1.990846681922197e-05, | |
| "loss": 2.9433, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 24.35, | |
| "eval_loss": 2.8818936347961426, | |
| "eval_runtime": 8.193, | |
| "eval_samples_per_second": 25.266, | |
| "eval_steps_per_second": 3.173, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 25.16, | |
| "learning_rate": 1.7048054919908468e-05, | |
| "loss": 2.8536, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 25.16, | |
| "eval_loss": 2.837463140487671, | |
| "eval_runtime": 8.1663, | |
| "eval_samples_per_second": 25.348, | |
| "eval_steps_per_second": 3.184, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 25.97, | |
| "learning_rate": 1.4187643020594965e-05, | |
| "loss": 2.8742, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 25.97, | |
| "eval_loss": 2.857389450073242, | |
| "eval_runtime": 8.194, | |
| "eval_samples_per_second": 25.262, | |
| "eval_steps_per_second": 3.173, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 26.79, | |
| "learning_rate": 1.1327231121281464e-05, | |
| "loss": 2.8298, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 26.79, | |
| "eval_loss": 2.982081651687622, | |
| "eval_runtime": 8.1935, | |
| "eval_samples_per_second": 25.264, | |
| "eval_steps_per_second": 3.173, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 27.6, | |
| "learning_rate": 8.466819221967964e-06, | |
| "loss": 2.7439, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 27.6, | |
| "eval_loss": 3.2741587162017822, | |
| "eval_runtime": 8.2267, | |
| "eval_samples_per_second": 25.162, | |
| "eval_steps_per_second": 3.16, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 28.41, | |
| "learning_rate": 5.606407322654463e-06, | |
| "loss": 2.7008, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 28.41, | |
| "eval_loss": 3.365966320037842, | |
| "eval_runtime": 8.203, | |
| "eval_samples_per_second": 25.235, | |
| "eval_steps_per_second": 3.17, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 29.22, | |
| "learning_rate": 2.745995423340961e-06, | |
| "loss": 2.7087, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 29.22, | |
| "eval_loss": 3.812533140182495, | |
| "eval_runtime": 8.2382, | |
| "eval_samples_per_second": 25.127, | |
| "eval_steps_per_second": 3.156, | |
| "eval_wer": 1.3900226757369614, | |
| "step": 18000 | |
| } | |
| ], | |
| "max_steps": 18480, | |
| "num_train_epochs": 30, | |
| "total_flos": 4.093361821503758e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |