| { | |
| "best_metric": 0.3793087899684906, | |
| "best_model_checkpoint": "/data/users/yanyang/Projects/COCO_Caption_Refine/debug/git/2023-09-11-23-23-40/checkpoint-4800", | |
| "epoch": 2.9702970297029703, | |
| "global_step": 4800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.948432343234324e-05, | |
| "loss": 6.0088, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.9973134994506836, | |
| "eval_runtime": 2.3906, | |
| "eval_samples_per_second": 13.386, | |
| "eval_steps_per_second": 6.693, | |
| "eval_wer_score": 3.802087864288821, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.8968646864686466e-05, | |
| "loss": 1.3576, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 0.5444818139076233, | |
| "eval_runtime": 2.2435, | |
| "eval_samples_per_second": 14.264, | |
| "eval_steps_per_second": 7.132, | |
| "eval_wer_score": 3.5702479338842976, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.8452970297029704e-05, | |
| "loss": 0.5214, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 0.4757494628429413, | |
| "eval_runtime": 2.3157, | |
| "eval_samples_per_second": 13.818, | |
| "eval_steps_per_second": 6.909, | |
| "eval_wer_score": 4.052196607220531, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.793729372937294e-05, | |
| "loss": 0.4845, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 0.4562256336212158, | |
| "eval_runtime": 2.3849, | |
| "eval_samples_per_second": 13.418, | |
| "eval_steps_per_second": 6.709, | |
| "eval_wer_score": 4.051326663766855, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.7421617161716174e-05, | |
| "loss": 0.4694, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 0.4434777796268463, | |
| "eval_runtime": 2.3886, | |
| "eval_samples_per_second": 13.397, | |
| "eval_steps_per_second": 6.698, | |
| "eval_wer_score": 4.0582862113962594, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.6905940594059406e-05, | |
| "loss": 0.4598, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 0.43529027700424194, | |
| "eval_runtime": 2.3401, | |
| "eval_samples_per_second": 13.674, | |
| "eval_steps_per_second": 6.837, | |
| "eval_wer_score": 4.02131361461505, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.6390264026402644e-05, | |
| "loss": 0.4499, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 0.4303966462612152, | |
| "eval_runtime": 2.386, | |
| "eval_samples_per_second": 13.412, | |
| "eval_steps_per_second": 6.706, | |
| "eval_wer_score": 4.097433666811657, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.5874587458745876e-05, | |
| "loss": 0.4444, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_loss": 0.4260537624359131, | |
| "eval_runtime": 2.2892, | |
| "eval_samples_per_second": 13.979, | |
| "eval_steps_per_second": 6.989, | |
| "eval_wer_score": 4.073510221835581, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5358910891089114e-05, | |
| "loss": 0.4419, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 0.4221154451370239, | |
| "eval_runtime": 2.3303, | |
| "eval_samples_per_second": 13.732, | |
| "eval_steps_per_second": 6.866, | |
| "eval_wer_score": 4.101348412353197, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.4843234323432346e-05, | |
| "loss": 0.439, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 0.42153507471084595, | |
| "eval_runtime": 2.3513, | |
| "eval_samples_per_second": 13.61, | |
| "eval_steps_per_second": 6.805, | |
| "eval_wer_score": 4.062200956937799, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.432755775577558e-05, | |
| "loss": 0.4339, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 0.42045870423316956, | |
| "eval_runtime": 2.2803, | |
| "eval_samples_per_second": 14.034, | |
| "eval_steps_per_second": 7.017, | |
| "eval_wer_score": 4.056546324488909, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.3811881188118816e-05, | |
| "loss": 0.4295, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 0.4192642569541931, | |
| "eval_runtime": 2.3998, | |
| "eval_samples_per_second": 13.335, | |
| "eval_steps_per_second": 6.667, | |
| "eval_wer_score": 4.061765985210961, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.329620462046205e-05, | |
| "loss": 0.4275, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 0.4134916663169861, | |
| "eval_runtime": 1.7005, | |
| "eval_samples_per_second": 18.818, | |
| "eval_steps_per_second": 9.409, | |
| "eval_wer_score": 4.1200521966072206, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.278052805280528e-05, | |
| "loss": 0.4278, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_loss": 0.4126208424568176, | |
| "eval_runtime": 2.7168, | |
| "eval_samples_per_second": 11.779, | |
| "eval_steps_per_second": 5.889, | |
| "eval_wer_score": 4.035232709873858, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.226485148514852e-05, | |
| "loss": 0.4251, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_loss": 0.41297051310539246, | |
| "eval_runtime": 2.3567, | |
| "eval_samples_per_second": 13.578, | |
| "eval_steps_per_second": 6.789, | |
| "eval_wer_score": 4.082644628099174, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.174917491749175e-05, | |
| "loss": 0.4224, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_loss": 0.41018015146255493, | |
| "eval_runtime": 1.6739, | |
| "eval_samples_per_second": 19.117, | |
| "eval_steps_per_second": 9.558, | |
| "eval_wer_score": 4.034362766420183, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.123349834983499e-05, | |
| "loss": 0.4186, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_loss": 0.40729203820228577, | |
| "eval_runtime": 3.0395, | |
| "eval_samples_per_second": 10.528, | |
| "eval_steps_per_second": 5.264, | |
| "eval_wer_score": 4.054806437581557, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.071782178217822e-05, | |
| "loss": 0.4186, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_loss": 0.4050724506378174, | |
| "eval_runtime": 1.6968, | |
| "eval_samples_per_second": 18.859, | |
| "eval_steps_per_second": 9.429, | |
| "eval_wer_score": 4.0669856459330145, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.020214521452145e-05, | |
| "loss": 0.4159, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_loss": 0.4054299592971802, | |
| "eval_runtime": 2.3306, | |
| "eval_samples_per_second": 13.73, | |
| "eval_steps_per_second": 6.865, | |
| "eval_wer_score": 4.100043497172684, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.968646864686469e-05, | |
| "loss": 0.4157, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_loss": 0.4036748707294464, | |
| "eval_runtime": 2.3387, | |
| "eval_samples_per_second": 13.683, | |
| "eval_steps_per_second": 6.841, | |
| "eval_wer_score": 4.08133971291866, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.917079207920793e-05, | |
| "loss": 0.4141, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_loss": 0.40399593114852905, | |
| "eval_runtime": 1.6717, | |
| "eval_samples_per_second": 19.142, | |
| "eval_steps_per_second": 9.571, | |
| "eval_wer_score": 4.090474119182253, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.865511551155115e-05, | |
| "loss": 0.4119, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_loss": 0.4020461142063141, | |
| "eval_runtime": 2.2547, | |
| "eval_samples_per_second": 14.192, | |
| "eval_steps_per_second": 7.096, | |
| "eval_wer_score": 4.063505872118312, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.813943894389439e-05, | |
| "loss": 0.4104, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_loss": 0.39923593401908875, | |
| "eval_runtime": 2.3937, | |
| "eval_samples_per_second": 13.368, | |
| "eval_steps_per_second": 6.684, | |
| "eval_wer_score": 4.0487168334058286, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.762376237623763e-05, | |
| "loss": 0.4104, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_loss": 0.39828553795814514, | |
| "eval_runtime": 1.6946, | |
| "eval_samples_per_second": 18.883, | |
| "eval_steps_per_second": 9.442, | |
| "eval_wer_score": 4.107438016528926, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.710808580858086e-05, | |
| "loss": 0.4086, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 0.3990360200405121, | |
| "eval_runtime": 2.3013, | |
| "eval_samples_per_second": 13.905, | |
| "eval_steps_per_second": 6.953, | |
| "eval_wer_score": 4.1022183558068726, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.659240924092409e-05, | |
| "loss": 0.4082, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 0.39795249700546265, | |
| "eval_runtime": 2.3895, | |
| "eval_samples_per_second": 13.392, | |
| "eval_steps_per_second": 6.696, | |
| "eval_wer_score": 4.086124401913875, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.607673267326733e-05, | |
| "loss": 0.4071, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_loss": 0.39599987864494324, | |
| "eval_runtime": 1.6611, | |
| "eval_samples_per_second": 19.265, | |
| "eval_steps_per_second": 9.632, | |
| "eval_wer_score": 4.075250108742932, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.556105610561056e-05, | |
| "loss": 0.4057, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_loss": 0.39804807305336, | |
| "eval_runtime": 2.3409, | |
| "eval_samples_per_second": 13.67, | |
| "eval_steps_per_second": 6.835, | |
| "eval_wer_score": 4.056546324488909, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.50453795379538e-05, | |
| "loss": 0.4067, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 0.3962915241718292, | |
| "eval_runtime": 2.295, | |
| "eval_samples_per_second": 13.944, | |
| "eval_steps_per_second": 6.972, | |
| "eval_wer_score": 4.103088299260548, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.452970297029703e-05, | |
| "loss": 0.404, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_loss": 0.3967008888721466, | |
| "eval_runtime": 2.3182, | |
| "eval_samples_per_second": 13.804, | |
| "eval_steps_per_second": 6.902, | |
| "eval_wer_score": 4.110047846889952, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.4014026402640264e-05, | |
| "loss": 0.4019, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 0.39423680305480957, | |
| "eval_runtime": 2.2298, | |
| "eval_samples_per_second": 14.351, | |
| "eval_steps_per_second": 7.176, | |
| "eval_wer_score": 4.082644628099174, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.34983498349835e-05, | |
| "loss": 0.4002, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_loss": 0.39309296011924744, | |
| "eval_runtime": 2.3184, | |
| "eval_samples_per_second": 13.803, | |
| "eval_steps_per_second": 6.901, | |
| "eval_wer_score": 4.07742496737712, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.2982673267326734e-05, | |
| "loss": 0.3975, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 0.39335134625434875, | |
| "eval_runtime": 2.2755, | |
| "eval_samples_per_second": 14.063, | |
| "eval_steps_per_second": 7.032, | |
| "eval_wer_score": 4.10656807307525, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.2466996699669965e-05, | |
| "loss": 0.3932, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_loss": 0.3922082781791687, | |
| "eval_runtime": 2.3564, | |
| "eval_samples_per_second": 13.58, | |
| "eval_steps_per_second": 6.79, | |
| "eval_wer_score": 4.048281861678991, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.1951320132013203e-05, | |
| "loss": 0.3922, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_loss": 0.392187237739563, | |
| "eval_runtime": 2.3537, | |
| "eval_samples_per_second": 13.596, | |
| "eval_steps_per_second": 6.798, | |
| "eval_wer_score": 4.079164854284471, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.1435643564356435e-05, | |
| "loss": 0.3929, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_loss": 0.39260441064834595, | |
| "eval_runtime": 2.3232, | |
| "eval_samples_per_second": 13.774, | |
| "eval_steps_per_second": 6.887, | |
| "eval_wer_score": 4.020443671161375, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.0919966996699673e-05, | |
| "loss": 0.3914, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_loss": 0.3927755355834961, | |
| "eval_runtime": 2.2418, | |
| "eval_samples_per_second": 14.274, | |
| "eval_steps_per_second": 7.137, | |
| "eval_wer_score": 4.090474119182253, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0404290429042902e-05, | |
| "loss": 0.3911, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_loss": 0.39090630412101746, | |
| "eval_runtime": 2.3278, | |
| "eval_samples_per_second": 13.747, | |
| "eval_steps_per_second": 6.873, | |
| "eval_wer_score": 4.040887342322749, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.988861386138614e-05, | |
| "loss": 0.3903, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_loss": 0.3916274607181549, | |
| "eval_runtime": 2.2878, | |
| "eval_samples_per_second": 13.987, | |
| "eval_steps_per_second": 6.994, | |
| "eval_wer_score": 4.0426272292301, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.9372937293729375e-05, | |
| "loss": 0.3906, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_loss": 0.39041662216186523, | |
| "eval_runtime": 2.3191, | |
| "eval_samples_per_second": 13.798, | |
| "eval_steps_per_second": 6.899, | |
| "eval_wer_score": 4.063505872118312, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.885726072607261e-05, | |
| "loss": 0.3887, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_loss": 0.39140254259109497, | |
| "eval_runtime": 2.3005, | |
| "eval_samples_per_second": 13.91, | |
| "eval_steps_per_second": 6.955, | |
| "eval_wer_score": 4.08133971291866, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.834158415841584e-05, | |
| "loss": 0.3899, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_loss": 0.39112603664398193, | |
| "eval_runtime": 2.3634, | |
| "eval_samples_per_second": 13.54, | |
| "eval_steps_per_second": 6.77, | |
| "eval_wer_score": 4.0491518051326665, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.7825907590759077e-05, | |
| "loss": 0.389, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_loss": 0.3917953073978424, | |
| "eval_runtime": 2.2409, | |
| "eval_samples_per_second": 14.28, | |
| "eval_steps_per_second": 7.14, | |
| "eval_wer_score": 4.098738581992171, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.731023102310231e-05, | |
| "loss": 0.3878, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_loss": 0.38973185420036316, | |
| "eval_runtime": 2.3148, | |
| "eval_samples_per_second": 13.824, | |
| "eval_steps_per_second": 6.912, | |
| "eval_wer_score": 4.091779034362767, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.6794554455445547e-05, | |
| "loss": 0.3877, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_loss": 0.387917160987854, | |
| "eval_runtime": 2.3247, | |
| "eval_samples_per_second": 13.765, | |
| "eval_steps_per_second": 6.883, | |
| "eval_wer_score": 4.068290561113527, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.6278877887788778e-05, | |
| "loss": 0.3871, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_loss": 0.3881215453147888, | |
| "eval_runtime": 2.2878, | |
| "eval_samples_per_second": 13.987, | |
| "eval_steps_per_second": 6.994, | |
| "eval_wer_score": 4.076989995650282, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.5763201320132013e-05, | |
| "loss": 0.387, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_loss": 0.3859871029853821, | |
| "eval_runtime": 2.3063, | |
| "eval_samples_per_second": 13.875, | |
| "eval_steps_per_second": 6.937, | |
| "eval_wer_score": 4.069160504567203, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5247524752475248e-05, | |
| "loss": 0.3887, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_loss": 0.38679689168930054, | |
| "eval_runtime": 2.3448, | |
| "eval_samples_per_second": 13.647, | |
| "eval_steps_per_second": 6.824, | |
| "eval_wer_score": 4.063070900391475, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.4731848184818483e-05, | |
| "loss": 0.3847, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_loss": 0.3876645267009735, | |
| "eval_runtime": 1.6183, | |
| "eval_samples_per_second": 19.774, | |
| "eval_steps_per_second": 9.887, | |
| "eval_wer_score": 4.091344062635929, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.4216171617161718e-05, | |
| "loss": 0.3854, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_loss": 0.3880044221878052, | |
| "eval_runtime": 1.6125, | |
| "eval_samples_per_second": 19.845, | |
| "eval_steps_per_second": 9.923, | |
| "eval_wer_score": 4.040887342322749, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.370049504950495e-05, | |
| "loss": 0.3841, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_loss": 0.38710057735443115, | |
| "eval_runtime": 1.6138, | |
| "eval_samples_per_second": 19.829, | |
| "eval_steps_per_second": 9.914, | |
| "eval_wer_score": 4.064810787298826, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.3184818481848185e-05, | |
| "loss": 0.384, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_loss": 0.3867349326610565, | |
| "eval_runtime": 1.6339, | |
| "eval_samples_per_second": 19.585, | |
| "eval_steps_per_second": 9.793, | |
| "eval_wer_score": 4.063070900391475, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.266914191419142e-05, | |
| "loss": 0.3839, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_loss": 0.38524535298347473, | |
| "eval_runtime": 1.6377, | |
| "eval_samples_per_second": 19.539, | |
| "eval_steps_per_second": 9.77, | |
| "eval_wer_score": 4.076989995650282, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.2153465346534655e-05, | |
| "loss": 0.3833, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_loss": 0.3858553171157837, | |
| "eval_runtime": 1.6388, | |
| "eval_samples_per_second": 19.526, | |
| "eval_steps_per_second": 9.763, | |
| "eval_wer_score": 4.033492822966507, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.1637788778877886e-05, | |
| "loss": 0.3819, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 0.38464829325675964, | |
| "eval_runtime": 1.6686, | |
| "eval_samples_per_second": 19.178, | |
| "eval_steps_per_second": 9.589, | |
| "eval_wer_score": 4.073510221835581, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.1122112211221125e-05, | |
| "loss": 0.3831, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_loss": 0.3838270902633667, | |
| "eval_runtime": 1.6915, | |
| "eval_samples_per_second": 18.918, | |
| "eval_steps_per_second": 9.459, | |
| "eval_wer_score": 4.0678555893866895, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.0606435643564356e-05, | |
| "loss": 0.3817, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_loss": 0.38364166021347046, | |
| "eval_runtime": 1.6576, | |
| "eval_samples_per_second": 19.305, | |
| "eval_steps_per_second": 9.653, | |
| "eval_wer_score": 4.051761635493693, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.009075907590759e-05, | |
| "loss": 0.3811, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_loss": 0.38430899381637573, | |
| "eval_runtime": 1.6858, | |
| "eval_samples_per_second": 18.982, | |
| "eval_steps_per_second": 9.491, | |
| "eval_wer_score": 4.053936494127882, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.9575082508250826e-05, | |
| "loss": 0.3821, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_loss": 0.3839759826660156, | |
| "eval_runtime": 1.6393, | |
| "eval_samples_per_second": 19.52, | |
| "eval_steps_per_second": 9.76, | |
| "eval_wer_score": 4.08133971291866, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.905940594059406e-05, | |
| "loss": 0.3803, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_loss": 0.3829007148742676, | |
| "eval_runtime": 1.757, | |
| "eval_samples_per_second": 18.212, | |
| "eval_steps_per_second": 9.106, | |
| "eval_wer_score": 4.043932144410613, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8543729372937293e-05, | |
| "loss": 0.3804, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_loss": 0.3814784288406372, | |
| "eval_runtime": 1.7626, | |
| "eval_samples_per_second": 18.155, | |
| "eval_steps_per_second": 9.078, | |
| "eval_wer_score": 4.068725532840365, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8028052805280528e-05, | |
| "loss": 0.3803, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_loss": 0.382568895816803, | |
| "eval_runtime": 1.7262, | |
| "eval_samples_per_second": 18.538, | |
| "eval_steps_per_second": 9.269, | |
| "eval_wer_score": 4.033927794693345, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.7512376237623763e-05, | |
| "loss": 0.3774, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_loss": 0.38378775119781494, | |
| "eval_runtime": 1.776, | |
| "eval_samples_per_second": 18.018, | |
| "eval_steps_per_second": 9.009, | |
| "eval_wer_score": 4.053936494127882, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.6996699669966998e-05, | |
| "loss": 0.379, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_loss": 0.3831249475479126, | |
| "eval_runtime": 1.7324, | |
| "eval_samples_per_second": 18.471, | |
| "eval_steps_per_second": 9.236, | |
| "eval_wer_score": 4.062635928664637, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.648102310231023e-05, | |
| "loss": 0.3779, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_loss": 0.3829612135887146, | |
| "eval_runtime": 1.7483, | |
| "eval_samples_per_second": 18.304, | |
| "eval_steps_per_second": 9.152, | |
| "eval_wer_score": 4.070465419747716, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.5965346534653468e-05, | |
| "loss": 0.3714, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_loss": 0.38411805033683777, | |
| "eval_runtime": 1.7491, | |
| "eval_samples_per_second": 18.295, | |
| "eval_steps_per_second": 9.147, | |
| "eval_wer_score": 4.074380165289257, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.54496699669967e-05, | |
| "loss": 0.3718, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_loss": 0.38236159086227417, | |
| "eval_runtime": 1.7284, | |
| "eval_samples_per_second": 18.514, | |
| "eval_steps_per_second": 9.257, | |
| "eval_wer_score": 4.076555023923445, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.4933993399339935e-05, | |
| "loss": 0.3705, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "eval_loss": 0.3845175504684448, | |
| "eval_runtime": 1.748, | |
| "eval_samples_per_second": 18.307, | |
| "eval_steps_per_second": 9.153, | |
| "eval_wer_score": 4.075250108742932, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.4418316831683168e-05, | |
| "loss": 0.3719, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_loss": 0.382493793964386, | |
| "eval_runtime": 1.7164, | |
| "eval_samples_per_second": 18.643, | |
| "eval_steps_per_second": 9.322, | |
| "eval_wer_score": 4.0848194867333625, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3902640264026403e-05, | |
| "loss": 0.3715, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_loss": 0.3831576108932495, | |
| "eval_runtime": 1.7528, | |
| "eval_samples_per_second": 18.257, | |
| "eval_steps_per_second": 9.128, | |
| "eval_wer_score": 4.061765985210961, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.3386963696369636e-05, | |
| "loss": 0.3704, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_loss": 0.38182923197746277, | |
| "eval_runtime": 1.8646, | |
| "eval_samples_per_second": 17.161, | |
| "eval_steps_per_second": 8.581, | |
| "eval_wer_score": 4.0843845150065246, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2871287128712873e-05, | |
| "loss": 0.3703, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_loss": 0.38130998611450195, | |
| "eval_runtime": 1.7229, | |
| "eval_samples_per_second": 18.574, | |
| "eval_steps_per_second": 9.287, | |
| "eval_wer_score": 4.063070900391475, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2355610561056106e-05, | |
| "loss": 0.3697, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_loss": 0.381599485874176, | |
| "eval_runtime": 1.7655, | |
| "eval_samples_per_second": 18.126, | |
| "eval_steps_per_second": 9.063, | |
| "eval_wer_score": 4.073510221835581, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.1839933993399341e-05, | |
| "loss": 0.3706, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_loss": 0.3811095952987671, | |
| "eval_runtime": 1.7447, | |
| "eval_samples_per_second": 18.341, | |
| "eval_steps_per_second": 9.171, | |
| "eval_wer_score": 4.06394084384515, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1324257425742574e-05, | |
| "loss": 0.3709, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_loss": 0.3830665946006775, | |
| "eval_runtime": 1.7245, | |
| "eval_samples_per_second": 18.556, | |
| "eval_steps_per_second": 9.278, | |
| "eval_wer_score": 4.070900391474554, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.080858085808581e-05, | |
| "loss": 0.3698, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_loss": 0.38286030292510986, | |
| "eval_runtime": 1.7434, | |
| "eval_samples_per_second": 18.355, | |
| "eval_steps_per_second": 9.178, | |
| "eval_wer_score": 4.066115702479339, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.0292904290429044e-05, | |
| "loss": 0.3692, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "eval_loss": 0.38252702355384827, | |
| "eval_runtime": 1.7413, | |
| "eval_samples_per_second": 18.377, | |
| "eval_steps_per_second": 9.189, | |
| "eval_wer_score": 4.063070900391475, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.777227722772278e-06, | |
| "loss": 0.3695, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_loss": 0.3817928731441498, | |
| "eval_runtime": 1.7445, | |
| "eval_samples_per_second": 18.343, | |
| "eval_steps_per_second": 9.172, | |
| "eval_wer_score": 4.0665506742061766, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.261551155115513e-06, | |
| "loss": 0.3678, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "eval_loss": 0.38285502791404724, | |
| "eval_runtime": 1.7607, | |
| "eval_samples_per_second": 18.174, | |
| "eval_steps_per_second": 9.087, | |
| "eval_wer_score": 4.053936494127882, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.745874587458746e-06, | |
| "loss": 0.3679, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_loss": 0.38176512718200684, | |
| "eval_runtime": 1.731, | |
| "eval_samples_per_second": 18.487, | |
| "eval_steps_per_second": 9.243, | |
| "eval_wer_score": 4.0939538929969554, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.230198019801981e-06, | |
| "loss": 0.3692, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_loss": 0.3833461403846741, | |
| "eval_runtime": 1.7399, | |
| "eval_samples_per_second": 18.392, | |
| "eval_steps_per_second": 9.196, | |
| "eval_wer_score": 4.0856894301870375, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.714521452145216e-06, | |
| "loss": 0.3691, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "eval_loss": 0.381513774394989, | |
| "eval_runtime": 1.7336, | |
| "eval_samples_per_second": 18.458, | |
| "eval_steps_per_second": 9.229, | |
| "eval_wer_score": 4.061765985210961, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.198844884488449e-06, | |
| "loss": 0.3674, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_loss": 0.38187551498413086, | |
| "eval_runtime": 1.7502, | |
| "eval_samples_per_second": 18.284, | |
| "eval_steps_per_second": 9.142, | |
| "eval_wer_score": 4.069595476294041, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.6831683168316835e-06, | |
| "loss": 0.3675, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "eval_loss": 0.38118019700050354, | |
| "eval_runtime": 1.7491, | |
| "eval_samples_per_second": 18.295, | |
| "eval_steps_per_second": 9.147, | |
| "eval_wer_score": 4.0491518051326665, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.167491749174918e-06, | |
| "loss": 0.3683, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "eval_loss": 0.38107800483703613, | |
| "eval_runtime": 1.7521, | |
| "eval_samples_per_second": 18.264, | |
| "eval_steps_per_second": 9.132, | |
| "eval_wer_score": 4.057416267942584, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.651815181518152e-06, | |
| "loss": 0.3688, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_loss": 0.3809460401535034, | |
| "eval_runtime": 1.7731, | |
| "eval_samples_per_second": 18.047, | |
| "eval_steps_per_second": 9.024, | |
| "eval_wer_score": 4.06002609830361, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.136138613861386e-06, | |
| "loss": 0.3661, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_loss": 0.3806704878807068, | |
| "eval_runtime": 1.7466, | |
| "eval_samples_per_second": 18.321, | |
| "eval_steps_per_second": 9.16, | |
| "eval_wer_score": 4.070030448020878, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.62046204620462e-06, | |
| "loss": 0.3685, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_loss": 0.3806542456150055, | |
| "eval_runtime": 1.7323, | |
| "eval_samples_per_second": 18.473, | |
| "eval_steps_per_second": 9.236, | |
| "eval_wer_score": 4.0400173988690735, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.104785478547855e-06, | |
| "loss": 0.3665, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "eval_loss": 0.3803941607475281, | |
| "eval_runtime": 1.8056, | |
| "eval_samples_per_second": 17.723, | |
| "eval_steps_per_second": 8.861, | |
| "eval_wer_score": 4.07177033492823, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.589108910891089e-06, | |
| "loss": 0.3665, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_loss": 0.38016611337661743, | |
| "eval_runtime": 1.7258, | |
| "eval_samples_per_second": 18.542, | |
| "eval_steps_per_second": 9.271, | |
| "eval_wer_score": 4.04654197477164, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.073432343234324e-06, | |
| "loss": 0.3651, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_loss": 0.38055145740509033, | |
| "eval_runtime": 1.7219, | |
| "eval_samples_per_second": 18.584, | |
| "eval_steps_per_second": 9.292, | |
| "eval_wer_score": 4.047411918225316, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.557755775577558e-06, | |
| "loss": 0.3658, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_loss": 0.3797268867492676, | |
| "eval_runtime": 1.7777, | |
| "eval_samples_per_second": 18.0, | |
| "eval_steps_per_second": 9.0, | |
| "eval_wer_score": 4.038712483688561, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.042079207920792e-06, | |
| "loss": 0.3661, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "eval_loss": 0.37951183319091797, | |
| "eval_runtime": 1.7219, | |
| "eval_samples_per_second": 18.584, | |
| "eval_steps_per_second": 9.292, | |
| "eval_wer_score": 4.044367116137451, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.5264026402640265e-06, | |
| "loss": 0.3679, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_loss": 0.37964126467704773, | |
| "eval_runtime": 1.7634, | |
| "eval_samples_per_second": 18.146, | |
| "eval_steps_per_second": 9.073, | |
| "eval_wer_score": 4.043062200956938, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.0107260726072606e-06, | |
| "loss": 0.3667, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "eval_loss": 0.37969282269477844, | |
| "eval_runtime": 1.7225, | |
| "eval_samples_per_second": 18.577, | |
| "eval_steps_per_second": 9.289, | |
| "eval_wer_score": 4.039582427142236, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.950495049504951e-07, | |
| "loss": 0.3671, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_loss": 0.3793087899684906, | |
| "eval_runtime": 1.7729, | |
| "eval_samples_per_second": 18.049, | |
| "eval_steps_per_second": 9.025, | |
| "eval_wer_score": 4.044802087864289, | |
| "step": 4800 | |
| } | |
| ], | |
| "max_steps": 4848, | |
| "num_train_epochs": 3, | |
| "total_flos": 1.39557801614635e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |