| { | |
| "best_metric": 0.3922309875488281, | |
| "best_model_checkpoint": "/data/users/yanyang/Projects/COCO_Caption_Refine/debug/git/2023-09-12-11-13-17_git-base/checkpoint-4800", | |
| "epoch": 2.9702970297029703, | |
| "global_step": 4800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.948432343234324e-05, | |
| "loss": 6.7185, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 4.150215148925781, | |
| "eval_runtime": 2.7394, | |
| "eval_samples_per_second": 11.682, | |
| "eval_steps_per_second": 5.841, | |
| "eval_wer_score": 2.6076555023923444, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.8968646864686466e-05, | |
| "loss": 2.2563, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 0.7511033415794373, | |
| "eval_runtime": 2.511, | |
| "eval_samples_per_second": 12.744, | |
| "eval_steps_per_second": 6.372, | |
| "eval_wer_score": 1.8782079164854284, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.8452970297029704e-05, | |
| "loss": 0.6084, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 0.5207427144050598, | |
| "eval_runtime": 3.1225, | |
| "eval_samples_per_second": 10.248, | |
| "eval_steps_per_second": 5.124, | |
| "eval_wer_score": 2.3458025228360158, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.793729372937294e-05, | |
| "loss": 0.5236, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 0.48739808797836304, | |
| "eval_runtime": 3.4202, | |
| "eval_samples_per_second": 9.356, | |
| "eval_steps_per_second": 4.678, | |
| "eval_wer_score": 2.6250543714658545, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.7421617161716174e-05, | |
| "loss": 0.5022, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 0.4747964143753052, | |
| "eval_runtime": 3.5933, | |
| "eval_samples_per_second": 8.906, | |
| "eval_steps_per_second": 4.453, | |
| "eval_wer_score": 2.889952153110048, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.6905940594059406e-05, | |
| "loss": 0.489, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 0.4658946096897125, | |
| "eval_runtime": 3.2552, | |
| "eval_samples_per_second": 9.83, | |
| "eval_steps_per_second": 4.915, | |
| "eval_wer_score": 3.058721183123097, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.6390264026402644e-05, | |
| "loss": 0.477, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 0.46055227518081665, | |
| "eval_runtime": 3.7594, | |
| "eval_samples_per_second": 8.512, | |
| "eval_steps_per_second": 4.256, | |
| "eval_wer_score": 2.9904306220095696, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.5874587458745876e-05, | |
| "loss": 0.4702, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_loss": 0.4569026827812195, | |
| "eval_runtime": 3.9386, | |
| "eval_samples_per_second": 8.125, | |
| "eval_steps_per_second": 4.062, | |
| "eval_wer_score": 2.941278816876903, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5358910891089114e-05, | |
| "loss": 0.4673, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 0.45087775588035583, | |
| "eval_runtime": 2.9403, | |
| "eval_samples_per_second": 10.883, | |
| "eval_steps_per_second": 5.442, | |
| "eval_wer_score": 3.01739886907351, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.4843234323432346e-05, | |
| "loss": 0.4633, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 0.4465155601501465, | |
| "eval_runtime": 3.3332, | |
| "eval_samples_per_second": 9.6, | |
| "eval_steps_per_second": 4.8, | |
| "eval_wer_score": 3.111787733797303, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.432755775577558e-05, | |
| "loss": 0.4579, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 0.44231322407722473, | |
| "eval_runtime": 3.3294, | |
| "eval_samples_per_second": 9.611, | |
| "eval_steps_per_second": 4.806, | |
| "eval_wer_score": 3.1004784688995217, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.3811881188118816e-05, | |
| "loss": 0.4522, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 0.4409943222999573, | |
| "eval_runtime": 4.0034, | |
| "eval_samples_per_second": 7.993, | |
| "eval_steps_per_second": 3.997, | |
| "eval_wer_score": 3.08916920400174, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.329620462046205e-05, | |
| "loss": 0.4495, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 0.4370802044868469, | |
| "eval_runtime": 2.7858, | |
| "eval_samples_per_second": 11.487, | |
| "eval_steps_per_second": 5.743, | |
| "eval_wer_score": 3.1792083514571554, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.278052805280528e-05, | |
| "loss": 0.4498, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_loss": 0.43357548117637634, | |
| "eval_runtime": 2.6149, | |
| "eval_samples_per_second": 12.238, | |
| "eval_steps_per_second": 6.119, | |
| "eval_wer_score": 3.1222270552414093, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.226485148514852e-05, | |
| "loss": 0.4461, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_loss": 0.4354948401451111, | |
| "eval_runtime": 2.7939, | |
| "eval_samples_per_second": 11.454, | |
| "eval_steps_per_second": 5.727, | |
| "eval_wer_score": 3.187037842540235, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.174917491749175e-05, | |
| "loss": 0.4435, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_loss": 0.4297381043434143, | |
| "eval_runtime": 2.5337, | |
| "eval_samples_per_second": 12.63, | |
| "eval_steps_per_second": 6.315, | |
| "eval_wer_score": 3.207046541974772, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.123349834983499e-05, | |
| "loss": 0.4392, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_loss": 0.4316774606704712, | |
| "eval_runtime": 2.6742, | |
| "eval_samples_per_second": 11.966, | |
| "eval_steps_per_second": 5.983, | |
| "eval_wer_score": 3.1857329273597217, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.071782178217822e-05, | |
| "loss": 0.4385, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_loss": 0.42789211869239807, | |
| "eval_runtime": 2.5419, | |
| "eval_samples_per_second": 12.589, | |
| "eval_steps_per_second": 6.294, | |
| "eval_wer_score": 3.186602870813397, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.020214521452145e-05, | |
| "loss": 0.4352, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_loss": 0.4274422526359558, | |
| "eval_runtime": 2.5697, | |
| "eval_samples_per_second": 12.453, | |
| "eval_steps_per_second": 6.226, | |
| "eval_wer_score": 3.23836450630709, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.968646864686469e-05, | |
| "loss": 0.4354, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_loss": 0.42688965797424316, | |
| "eval_runtime": 2.9492, | |
| "eval_samples_per_second": 10.85, | |
| "eval_steps_per_second": 5.425, | |
| "eval_wer_score": 3.192692474989126, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.917079207920793e-05, | |
| "loss": 0.4336, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_loss": 0.42364591360092163, | |
| "eval_runtime": 3.3776, | |
| "eval_samples_per_second": 9.474, | |
| "eval_steps_per_second": 4.737, | |
| "eval_wer_score": 3.1705089169204004, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.865511551155115e-05, | |
| "loss": 0.4315, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_loss": 0.42294472455978394, | |
| "eval_runtime": 3.4043, | |
| "eval_samples_per_second": 9.4, | |
| "eval_steps_per_second": 4.7, | |
| "eval_wer_score": 3.2618529795563287, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.813943894389439e-05, | |
| "loss": 0.4297, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_loss": 0.4200877547264099, | |
| "eval_runtime": 3.2244, | |
| "eval_samples_per_second": 9.924, | |
| "eval_steps_per_second": 4.962, | |
| "eval_wer_score": 3.2818616789908654, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.762376237623763e-05, | |
| "loss": 0.429, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_loss": 0.4193739593029022, | |
| "eval_runtime": 3.0348, | |
| "eval_samples_per_second": 10.544, | |
| "eval_steps_per_second": 5.272, | |
| "eval_wer_score": 3.281426707264028, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.710808580858086e-05, | |
| "loss": 0.4272, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 0.41733482480049133, | |
| "eval_runtime": 3.4043, | |
| "eval_samples_per_second": 9.4, | |
| "eval_steps_per_second": 4.7, | |
| "eval_wer_score": 3.2501087429317095, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.659240924092409e-05, | |
| "loss": 0.4266, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 0.4167550206184387, | |
| "eval_runtime": 3.1991, | |
| "eval_samples_per_second": 10.003, | |
| "eval_steps_per_second": 5.001, | |
| "eval_wer_score": 3.240539364941279, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.607673267326733e-05, | |
| "loss": 0.4257, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_loss": 0.4143298268318176, | |
| "eval_runtime": 3.1177, | |
| "eval_samples_per_second": 10.264, | |
| "eval_steps_per_second": 5.132, | |
| "eval_wer_score": 3.2570682905611137, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.556105610561056e-05, | |
| "loss": 0.424, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_loss": 0.4156000018119812, | |
| "eval_runtime": 3.2132, | |
| "eval_samples_per_second": 9.959, | |
| "eval_steps_per_second": 4.979, | |
| "eval_wer_score": 3.2035667681600697, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.50453795379538e-05, | |
| "loss": 0.4249, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 0.4149695038795471, | |
| "eval_runtime": 3.367, | |
| "eval_samples_per_second": 9.504, | |
| "eval_steps_per_second": 4.752, | |
| "eval_wer_score": 3.2470639408438453, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.452970297029703e-05, | |
| "loss": 0.422, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_loss": 0.4136950373649597, | |
| "eval_runtime": 3.0485, | |
| "eval_samples_per_second": 10.497, | |
| "eval_steps_per_second": 5.249, | |
| "eval_wer_score": 3.257938234014789, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.4014026402640264e-05, | |
| "loss": 0.4193, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 0.41170167922973633, | |
| "eval_runtime": 3.4654, | |
| "eval_samples_per_second": 9.234, | |
| "eval_steps_per_second": 4.617, | |
| "eval_wer_score": 3.2470639408438453, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.34983498349835e-05, | |
| "loss": 0.4179, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_loss": 0.412392795085907, | |
| "eval_runtime": 3.3054, | |
| "eval_samples_per_second": 9.681, | |
| "eval_steps_per_second": 4.841, | |
| "eval_wer_score": 3.2440191387559807, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.2982673267326734e-05, | |
| "loss": 0.4164, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 0.41018491983413696, | |
| "eval_runtime": 3.215, | |
| "eval_samples_per_second": 9.953, | |
| "eval_steps_per_second": 4.977, | |
| "eval_wer_score": 3.2679425837320575, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.2466996699669965e-05, | |
| "loss": 0.4121, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_loss": 0.4093266427516937, | |
| "eval_runtime": 3.3525, | |
| "eval_samples_per_second": 9.545, | |
| "eval_steps_per_second": 4.773, | |
| "eval_wer_score": 3.222705524140931, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.1951320132013203e-05, | |
| "loss": 0.4103, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_loss": 0.4072987139225006, | |
| "eval_runtime": 3.3812, | |
| "eval_samples_per_second": 9.464, | |
| "eval_steps_per_second": 4.732, | |
| "eval_wer_score": 3.1874728142670725, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.1435643564356435e-05, | |
| "loss": 0.411, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_loss": 0.4119817614555359, | |
| "eval_runtime": 1.9138, | |
| "eval_samples_per_second": 16.721, | |
| "eval_steps_per_second": 8.36, | |
| "eval_wer_score": 3.201391909525881, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.0919966996699673e-05, | |
| "loss": 0.4095, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_loss": 0.4093400537967682, | |
| "eval_runtime": 2.0537, | |
| "eval_samples_per_second": 15.582, | |
| "eval_steps_per_second": 7.791, | |
| "eval_wer_score": 3.218790778599391, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0404290429042902e-05, | |
| "loss": 0.4093, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_loss": 0.4096407890319824, | |
| "eval_runtime": 1.9992, | |
| "eval_samples_per_second": 16.006, | |
| "eval_steps_per_second": 8.003, | |
| "eval_wer_score": 3.1705089169204004, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.988861386138614e-05, | |
| "loss": 0.4081, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_loss": 0.4094192683696747, | |
| "eval_runtime": 2.0819, | |
| "eval_samples_per_second": 15.371, | |
| "eval_steps_per_second": 7.685, | |
| "eval_wer_score": 3.1966072205306655, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.9372937293729375e-05, | |
| "loss": 0.4083, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_loss": 0.4079236090183258, | |
| "eval_runtime": 1.9742, | |
| "eval_samples_per_second": 16.209, | |
| "eval_steps_per_second": 8.104, | |
| "eval_wer_score": 3.2231404958677685, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.885726072607261e-05, | |
| "loss": 0.4065, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_loss": 0.4078274965286255, | |
| "eval_runtime": 1.9767, | |
| "eval_samples_per_second": 16.189, | |
| "eval_steps_per_second": 8.094, | |
| "eval_wer_score": 3.2292301000434973, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.834158415841584e-05, | |
| "loss": 0.4074, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_loss": 0.40426379442214966, | |
| "eval_runtime": 2.1917, | |
| "eval_samples_per_second": 14.601, | |
| "eval_steps_per_second": 7.3, | |
| "eval_wer_score": 3.2127011744236627, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.7825907590759077e-05, | |
| "loss": 0.4066, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_loss": 0.40665364265441895, | |
| "eval_runtime": 2.255, | |
| "eval_samples_per_second": 14.191, | |
| "eval_steps_per_second": 7.095, | |
| "eval_wer_score": 3.2053066550674205, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.731023102310231e-05, | |
| "loss": 0.405, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_loss": 0.4042993485927582, | |
| "eval_runtime": 2.1192, | |
| "eval_samples_per_second": 15.1, | |
| "eval_steps_per_second": 7.55, | |
| "eval_wer_score": 3.2448890822096566, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.6794554455445547e-05, | |
| "loss": 0.4051, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_loss": 0.4049427807331085, | |
| "eval_runtime": 1.9064, | |
| "eval_samples_per_second": 16.786, | |
| "eval_steps_per_second": 8.393, | |
| "eval_wer_score": 3.2109612875163114, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.6278877887788778e-05, | |
| "loss": 0.4045, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_loss": 0.4028187394142151, | |
| "eval_runtime": 2.0181, | |
| "eval_samples_per_second": 15.856, | |
| "eval_steps_per_second": 7.928, | |
| "eval_wer_score": 3.2035667681600697, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.5763201320132013e-05, | |
| "loss": 0.4045, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_loss": 0.4024648070335388, | |
| "eval_runtime": 2.027, | |
| "eval_samples_per_second": 15.787, | |
| "eval_steps_per_second": 7.894, | |
| "eval_wer_score": 3.1757285776424533, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5247524752475248e-05, | |
| "loss": 0.406, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_loss": 0.400738000869751, | |
| "eval_runtime": 2.1415, | |
| "eval_samples_per_second": 14.942, | |
| "eval_steps_per_second": 7.471, | |
| "eval_wer_score": 3.204001739886907, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.4731848184818483e-05, | |
| "loss": 0.4021, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_loss": 0.40221601724624634, | |
| "eval_runtime": 2.237, | |
| "eval_samples_per_second": 14.305, | |
| "eval_steps_per_second": 7.152, | |
| "eval_wer_score": 3.144410613310135, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.4216171617161718e-05, | |
| "loss": 0.4026, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_loss": 0.4028313159942627, | |
| "eval_runtime": 2.2063, | |
| "eval_samples_per_second": 14.504, | |
| "eval_steps_per_second": 7.252, | |
| "eval_wer_score": 3.168769030013049, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.370049504950495e-05, | |
| "loss": 0.4014, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_loss": 0.4026516079902649, | |
| "eval_runtime": 2.131, | |
| "eval_samples_per_second": 15.016, | |
| "eval_steps_per_second": 7.508, | |
| "eval_wer_score": 3.2000869943453676, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.3184818481848185e-05, | |
| "loss": 0.4015, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_loss": 0.402204692363739, | |
| "eval_runtime": 2.0851, | |
| "eval_samples_per_second": 15.347, | |
| "eval_steps_per_second": 7.673, | |
| "eval_wer_score": 3.19182253153545, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.266914191419142e-05, | |
| "loss": 0.401, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_loss": 0.40174347162246704, | |
| "eval_runtime": 2.1944, | |
| "eval_samples_per_second": 14.583, | |
| "eval_steps_per_second": 7.291, | |
| "eval_wer_score": 3.189212701174424, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.2153465346534655e-05, | |
| "loss": 0.4007, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_loss": 0.40014830231666565, | |
| "eval_runtime": 2.1544, | |
| "eval_samples_per_second": 14.853, | |
| "eval_steps_per_second": 7.427, | |
| "eval_wer_score": 3.2148760330578514, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.1637788778877886e-05, | |
| "loss": 0.399, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 0.3999301791191101, | |
| "eval_runtime": 2.196, | |
| "eval_samples_per_second": 14.572, | |
| "eval_steps_per_second": 7.286, | |
| "eval_wer_score": 3.177468464549804, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.1122112211221125e-05, | |
| "loss": 0.4004, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_loss": 0.40041935443878174, | |
| "eval_runtime": 2.2406, | |
| "eval_samples_per_second": 14.282, | |
| "eval_steps_per_second": 7.141, | |
| "eval_wer_score": 3.186602870813397, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.0606435643564356e-05, | |
| "loss": 0.3988, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_loss": 0.4005739092826843, | |
| "eval_runtime": 2.1589, | |
| "eval_samples_per_second": 14.822, | |
| "eval_steps_per_second": 7.411, | |
| "eval_wer_score": 3.2235754675946064, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.009075907590759e-05, | |
| "loss": 0.3985, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_loss": 0.4012880325317383, | |
| "eval_runtime": 2.1243, | |
| "eval_samples_per_second": 15.064, | |
| "eval_steps_per_second": 7.532, | |
| "eval_wer_score": 3.2083514571552847, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.9575082508250826e-05, | |
| "loss": 0.3995, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_loss": 0.3977855443954468, | |
| "eval_runtime": 2.1918, | |
| "eval_samples_per_second": 14.6, | |
| "eval_steps_per_second": 7.3, | |
| "eval_wer_score": 3.192257503262288, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.905940594059406e-05, | |
| "loss": 0.3975, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_loss": 0.39725542068481445, | |
| "eval_runtime": 2.2663, | |
| "eval_samples_per_second": 14.12, | |
| "eval_steps_per_second": 7.06, | |
| "eval_wer_score": 3.174858634188778, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8543729372937293e-05, | |
| "loss": 0.3976, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_loss": 0.39580366015434265, | |
| "eval_runtime": 2.2795, | |
| "eval_samples_per_second": 14.038, | |
| "eval_steps_per_second": 7.019, | |
| "eval_wer_score": 3.12396694214876, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8028052805280528e-05, | |
| "loss": 0.3977, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_loss": 0.3961202800273895, | |
| "eval_runtime": 2.1696, | |
| "eval_samples_per_second": 14.749, | |
| "eval_steps_per_second": 7.374, | |
| "eval_wer_score": 3.165724227925185, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.7512376237623763e-05, | |
| "loss": 0.3945, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_loss": 0.396453857421875, | |
| "eval_runtime": 1.988, | |
| "eval_samples_per_second": 16.097, | |
| "eval_steps_per_second": 8.048, | |
| "eval_wer_score": 3.1805132666376688, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.6996699669966998e-05, | |
| "loss": 0.3962, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_loss": 0.39566469192504883, | |
| "eval_runtime": 1.9188, | |
| "eval_samples_per_second": 16.677, | |
| "eval_steps_per_second": 8.339, | |
| "eval_wer_score": 3.1852979556328838, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.648102310231023e-05, | |
| "loss": 0.3953, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_loss": 0.39734578132629395, | |
| "eval_runtime": 2.1888, | |
| "eval_samples_per_second": 14.62, | |
| "eval_steps_per_second": 7.31, | |
| "eval_wer_score": 3.13571117877338, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.5965346534653468e-05, | |
| "loss": 0.3896, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_loss": 0.3978061079978943, | |
| "eval_runtime": 2.2259, | |
| "eval_samples_per_second": 14.376, | |
| "eval_steps_per_second": 7.188, | |
| "eval_wer_score": 3.110047846889952, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.54496699669967e-05, | |
| "loss": 0.3907, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_loss": 0.3961105942726135, | |
| "eval_runtime": 2.5092, | |
| "eval_samples_per_second": 12.753, | |
| "eval_steps_per_second": 6.377, | |
| "eval_wer_score": 3.1274467159634622, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.4933993399339935e-05, | |
| "loss": 0.3889, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "eval_loss": 0.3963559865951538, | |
| "eval_runtime": 2.1557, | |
| "eval_samples_per_second": 14.845, | |
| "eval_steps_per_second": 7.422, | |
| "eval_wer_score": 3.190517616354937, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.4418316831683168e-05, | |
| "loss": 0.3902, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_loss": 0.3959140479564667, | |
| "eval_runtime": 2.1754, | |
| "eval_samples_per_second": 14.71, | |
| "eval_steps_per_second": 7.355, | |
| "eval_wer_score": 3.1857329273597217, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3902640264026403e-05, | |
| "loss": 0.3902, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_loss": 0.3955221176147461, | |
| "eval_runtime": 2.075, | |
| "eval_samples_per_second": 15.422, | |
| "eval_steps_per_second": 7.711, | |
| "eval_wer_score": 3.162244454110483, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.3386963696369636e-05, | |
| "loss": 0.3891, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_loss": 0.39533841609954834, | |
| "eval_runtime": 2.1894, | |
| "eval_samples_per_second": 14.616, | |
| "eval_steps_per_second": 7.308, | |
| "eval_wer_score": 3.17442366246194, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2871287128712873e-05, | |
| "loss": 0.3886, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_loss": 0.3946349620819092, | |
| "eval_runtime": 2.1424, | |
| "eval_samples_per_second": 14.936, | |
| "eval_steps_per_second": 7.468, | |
| "eval_wer_score": 3.142670726402784, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2355610561056106e-05, | |
| "loss": 0.388, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_loss": 0.3959529995918274, | |
| "eval_runtime": 2.1922, | |
| "eval_samples_per_second": 14.597, | |
| "eval_steps_per_second": 7.299, | |
| "eval_wer_score": 3.1309264897781643, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.1839933993399341e-05, | |
| "loss": 0.3888, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_loss": 0.39452987909317017, | |
| "eval_runtime": 2.1798, | |
| "eval_samples_per_second": 14.68, | |
| "eval_steps_per_second": 7.34, | |
| "eval_wer_score": 3.13571117877338, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1324257425742574e-05, | |
| "loss": 0.3891, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_loss": 0.39532509446144104, | |
| "eval_runtime": 2.0097, | |
| "eval_samples_per_second": 15.923, | |
| "eval_steps_per_second": 7.961, | |
| "eval_wer_score": 3.121357111787734, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.080858085808581e-05, | |
| "loss": 0.3883, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_loss": 0.39508694410324097, | |
| "eval_runtime": 2.1667, | |
| "eval_samples_per_second": 14.769, | |
| "eval_steps_per_second": 7.385, | |
| "eval_wer_score": 3.1361461505002173, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.0292904290429044e-05, | |
| "loss": 0.3876, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "eval_loss": 0.39381179213523865, | |
| "eval_runtime": 2.1778, | |
| "eval_samples_per_second": 14.693, | |
| "eval_steps_per_second": 7.347, | |
| "eval_wer_score": 3.1309264897781643, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.777227722772278e-06, | |
| "loss": 0.3879, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_loss": 0.39416271448135376, | |
| "eval_runtime": 2.2392, | |
| "eval_samples_per_second": 14.291, | |
| "eval_steps_per_second": 7.145, | |
| "eval_wer_score": 3.148325358851675, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.261551155115513e-06, | |
| "loss": 0.386, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "eval_loss": 0.39405977725982666, | |
| "eval_runtime": 2.2548, | |
| "eval_samples_per_second": 14.192, | |
| "eval_steps_per_second": 7.096, | |
| "eval_wer_score": 3.15311004784689, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.745874587458746e-06, | |
| "loss": 0.3862, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_loss": 0.3948515057563782, | |
| "eval_runtime": 2.2104, | |
| "eval_samples_per_second": 14.477, | |
| "eval_steps_per_second": 7.239, | |
| "eval_wer_score": 3.1378860374075686, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.230198019801981e-06, | |
| "loss": 0.3876, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_loss": 0.3954794704914093, | |
| "eval_runtime": 2.1178, | |
| "eval_samples_per_second": 15.11, | |
| "eval_steps_per_second": 7.555, | |
| "eval_wer_score": 3.1470204436711615, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.714521452145216e-06, | |
| "loss": 0.3876, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "eval_loss": 0.3942318856716156, | |
| "eval_runtime": 2.1511, | |
| "eval_samples_per_second": 14.876, | |
| "eval_steps_per_second": 7.438, | |
| "eval_wer_score": 3.1326663766855156, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.198844884488449e-06, | |
| "loss": 0.3858, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_loss": 0.39369016885757446, | |
| "eval_runtime": 2.2193, | |
| "eval_samples_per_second": 14.419, | |
| "eval_steps_per_second": 7.21, | |
| "eval_wer_score": 3.1270117442366248, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.6831683168316835e-06, | |
| "loss": 0.3855, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "eval_loss": 0.3940153121948242, | |
| "eval_runtime": 2.2424, | |
| "eval_samples_per_second": 14.27, | |
| "eval_steps_per_second": 7.135, | |
| "eval_wer_score": 3.1491953023053503, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.167491749174918e-06, | |
| "loss": 0.3864, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "eval_loss": 0.3938477337360382, | |
| "eval_runtime": 2.1981, | |
| "eval_samples_per_second": 14.558, | |
| "eval_steps_per_second": 7.279, | |
| "eval_wer_score": 3.1431056981296215, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.651815181518152e-06, | |
| "loss": 0.3869, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_loss": 0.3936881422996521, | |
| "eval_runtime": 2.2164, | |
| "eval_samples_per_second": 14.438, | |
| "eval_steps_per_second": 7.219, | |
| "eval_wer_score": 3.1583297085689432, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.136138613861386e-06, | |
| "loss": 0.3841, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_loss": 0.39349794387817383, | |
| "eval_runtime": 2.2175, | |
| "eval_samples_per_second": 14.43, | |
| "eval_steps_per_second": 7.215, | |
| "eval_wer_score": 3.1278816876903, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.62046204620462e-06, | |
| "loss": 0.3866, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_loss": 0.3936805725097656, | |
| "eval_runtime": 1.9094, | |
| "eval_samples_per_second": 16.759, | |
| "eval_steps_per_second": 8.38, | |
| "eval_wer_score": 3.119182253153545, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.104785478547855e-06, | |
| "loss": 0.3848, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "eval_loss": 0.3931500315666199, | |
| "eval_runtime": 2.1055, | |
| "eval_samples_per_second": 15.198, | |
| "eval_steps_per_second": 7.599, | |
| "eval_wer_score": 3.1235319704219227, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.589108910891089e-06, | |
| "loss": 0.3849, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_loss": 0.39277058839797974, | |
| "eval_runtime": 2.1028, | |
| "eval_samples_per_second": 15.218, | |
| "eval_steps_per_second": 7.609, | |
| "eval_wer_score": 3.1313614615050023, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.073432343234324e-06, | |
| "loss": 0.383, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_loss": 0.3926939368247986, | |
| "eval_runtime": 2.1315, | |
| "eval_samples_per_second": 15.013, | |
| "eval_steps_per_second": 7.506, | |
| "eval_wer_score": 3.1230969986950847, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.557755775577558e-06, | |
| "loss": 0.3839, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_loss": 0.39277368783950806, | |
| "eval_runtime": 2.1656, | |
| "eval_samples_per_second": 14.776, | |
| "eval_steps_per_second": 7.388, | |
| "eval_wer_score": 3.115702479338843, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.042079207920792e-06, | |
| "loss": 0.3843, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "eval_loss": 0.3926578164100647, | |
| "eval_runtime": 2.2527, | |
| "eval_samples_per_second": 14.205, | |
| "eval_steps_per_second": 7.103, | |
| "eval_wer_score": 3.1226620269682472, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.5264026402640265e-06, | |
| "loss": 0.3862, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_loss": 0.3923312723636627, | |
| "eval_runtime": 2.1236, | |
| "eval_samples_per_second": 15.069, | |
| "eval_steps_per_second": 7.534, | |
| "eval_wer_score": 3.1287516311439756, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.0107260726072606e-06, | |
| "loss": 0.3848, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "eval_loss": 0.3923192024230957, | |
| "eval_runtime": 2.1085, | |
| "eval_samples_per_second": 15.177, | |
| "eval_steps_per_second": 7.588, | |
| "eval_wer_score": 3.1448455850369728, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.950495049504951e-07, | |
| "loss": 0.3856, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_loss": 0.3922309875488281, | |
| "eval_runtime": 2.9258, | |
| "eval_samples_per_second": 10.937, | |
| "eval_steps_per_second": 5.469, | |
| "eval_wer_score": 3.133536320139191, | |
| "step": 4800 | |
| } | |
| ], | |
| "max_steps": 4848, | |
| "num_train_epochs": 3, | |
| "total_flos": 5.746828131664773e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |