| { | |
| "best_metric": 0.9965786507808726, | |
| "best_model_checkpoint": "/home/cloudwalker/ASVmodel/wavlm-base_2/checkpoint-300", | |
| "epoch": 49.9054820415879, | |
| "eval_steps": 100, | |
| "global_step": 19800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.515151515151515e-05, | |
| "loss": 0.4872, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.21796834468841553, | |
| "eval_runtime": 498.2273, | |
| "eval_samples_per_second": 49.865, | |
| "eval_steps_per_second": 24.932, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 3.03030303030303e-05, | |
| "loss": 0.1571, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.93342456931251, | |
| "eval_loss": 0.2581726014614105, | |
| "eval_runtime": 500.3876, | |
| "eval_samples_per_second": 49.65, | |
| "eval_steps_per_second": 24.825, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.545454545454545e-05, | |
| "loss": 0.0644, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.9965786507808726, | |
| "eval_loss": 0.024423159658908844, | |
| "eval_runtime": 500.4785, | |
| "eval_samples_per_second": 49.64, | |
| "eval_steps_per_second": 24.82, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 6.06060606060606e-05, | |
| "loss": 0.0553, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_accuracy": 0.9927950410561907, | |
| "eval_loss": 0.11555636674165726, | |
| "eval_runtime": 500.405, | |
| "eval_samples_per_second": 49.648, | |
| "eval_steps_per_second": 24.824, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 7.575757575757576e-05, | |
| "loss": 0.1108, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_accuracy": 0.9898164546771856, | |
| "eval_loss": 0.1576482504606247, | |
| "eval_runtime": 500.3246, | |
| "eval_samples_per_second": 49.656, | |
| "eval_steps_per_second": 24.828, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 9.09090909090909e-05, | |
| "loss": 0.0849, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "eval_accuracy": 0.9946868459185316, | |
| "eval_loss": 0.08708283305168152, | |
| "eval_runtime": 500.3759, | |
| "eval_samples_per_second": 49.651, | |
| "eval_steps_per_second": 24.825, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00010606060606060605, | |
| "loss": 0.0635, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_accuracy": 0.9938818225728546, | |
| "eval_loss": 0.10875184088945389, | |
| "eval_runtime": 500.3093, | |
| "eval_samples_per_second": 49.657, | |
| "eval_steps_per_second": 24.829, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 0.0001212121212121212, | |
| "loss": 0.0504, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_accuracy": 0.9789888906778297, | |
| "eval_loss": 0.4074054956436157, | |
| "eval_runtime": 500.4204, | |
| "eval_samples_per_second": 49.646, | |
| "eval_steps_per_second": 24.823, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 0.00013636363636363634, | |
| "loss": 0.1075, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "eval_accuracy": 0.9814442118821446, | |
| "eval_loss": 0.2954882085323334, | |
| "eval_runtime": 500.4501, | |
| "eval_samples_per_second": 49.643, | |
| "eval_steps_per_second": 24.822, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 0.00015151515151515152, | |
| "loss": 0.2387, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_accuracy": 0.9956126227660602, | |
| "eval_loss": 0.06512398272752762, | |
| "eval_runtime": 500.7096, | |
| "eval_samples_per_second": 49.618, | |
| "eval_steps_per_second": 24.809, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 0.00016666666666666666, | |
| "loss": 0.3052, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.23793257772922516, | |
| "eval_runtime": 500.5364, | |
| "eval_samples_per_second": 49.635, | |
| "eval_steps_per_second": 24.817, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 0.0001818181818181818, | |
| "loss": 0.3336, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.35269346833229065, | |
| "eval_runtime": 501.0064, | |
| "eval_samples_per_second": 49.588, | |
| "eval_steps_per_second": 24.794, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 0.00019696969696969695, | |
| "loss": 0.3322, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33067458868026733, | |
| "eval_runtime": 502.8336, | |
| "eval_samples_per_second": 49.408, | |
| "eval_steps_per_second": 24.704, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 0.0002121212121212121, | |
| "loss": 0.3201, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.34046611189842224, | |
| "eval_runtime": 500.7806, | |
| "eval_samples_per_second": 49.611, | |
| "eval_steps_per_second": 24.805, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 0.00022727272727272725, | |
| "loss": 0.3406, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33353403210639954, | |
| "eval_runtime": 500.7499, | |
| "eval_samples_per_second": 49.614, | |
| "eval_steps_per_second": 24.807, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 0.0002424242424242424, | |
| "loss": 0.3475, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3340916931629181, | |
| "eval_runtime": 500.598, | |
| "eval_samples_per_second": 49.629, | |
| "eval_steps_per_second": 24.814, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 0.00025757575757575756, | |
| "loss": 0.3312, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33610448241233826, | |
| "eval_runtime": 500.8798, | |
| "eval_samples_per_second": 49.601, | |
| "eval_steps_per_second": 24.8, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 0.0002727272727272727, | |
| "loss": 0.3367, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3309839069843292, | |
| "eval_runtime": 500.5768, | |
| "eval_samples_per_second": 49.631, | |
| "eval_steps_per_second": 24.815, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 0.00028787878787878786, | |
| "loss": 0.3284, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33385568857192993, | |
| "eval_runtime": 500.453, | |
| "eval_samples_per_second": 49.643, | |
| "eval_steps_per_second": 24.822, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 0.00029966329966329963, | |
| "loss": 0.3267, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3350389301776886, | |
| "eval_runtime": 500.7512, | |
| "eval_samples_per_second": 49.613, | |
| "eval_steps_per_second": 24.807, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 0.00029797979797979794, | |
| "loss": 0.338, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33081698417663574, | |
| "eval_runtime": 500.5738, | |
| "eval_samples_per_second": 49.631, | |
| "eval_steps_per_second": 24.816, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 0.00029629629629629624, | |
| "loss": 0.3277, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.330895334482193, | |
| "eval_runtime": 500.5595, | |
| "eval_samples_per_second": 49.632, | |
| "eval_steps_per_second": 24.816, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 0.0002946127946127946, | |
| "loss": 0.3294, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3312866687774658, | |
| "eval_runtime": 500.6862, | |
| "eval_samples_per_second": 49.62, | |
| "eval_steps_per_second": 24.81, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 0.0002929292929292929, | |
| "loss": 0.3315, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33599570393562317, | |
| "eval_runtime": 500.6176, | |
| "eval_samples_per_second": 49.627, | |
| "eval_steps_per_second": 24.813, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 0.00029124579124579125, | |
| "loss": 0.3397, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33067843317985535, | |
| "eval_runtime": 500.7157, | |
| "eval_samples_per_second": 49.617, | |
| "eval_steps_per_second": 24.808, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 0.00028956228956228955, | |
| "loss": 0.3318, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3359489440917969, | |
| "eval_runtime": 500.805, | |
| "eval_samples_per_second": 49.608, | |
| "eval_steps_per_second": 24.804, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 0.00028787878787878786, | |
| "loss": 0.3312, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3307555615901947, | |
| "eval_runtime": 500.6263, | |
| "eval_samples_per_second": 49.626, | |
| "eval_steps_per_second": 24.813, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 0.00028619528619528616, | |
| "loss": 0.3155, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33172452449798584, | |
| "eval_runtime": 500.7242, | |
| "eval_samples_per_second": 49.616, | |
| "eval_steps_per_second": 24.808, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 0.0002845117845117845, | |
| "loss": 0.3304, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33618223667144775, | |
| "eval_runtime": 500.5229, | |
| "eval_samples_per_second": 49.636, | |
| "eval_steps_per_second": 24.818, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 0.0002828282828282828, | |
| "loss": 0.338, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3341616988182068, | |
| "eval_runtime": 500.7918, | |
| "eval_samples_per_second": 49.609, | |
| "eval_steps_per_second": 24.805, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 0.0002811447811447811, | |
| "loss": 0.3241, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.330985426902771, | |
| "eval_runtime": 500.924, | |
| "eval_samples_per_second": 49.596, | |
| "eval_steps_per_second": 24.798, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 0.0002794612794612794, | |
| "loss": 0.3325, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33258649706840515, | |
| "eval_runtime": 500.7775, | |
| "eval_samples_per_second": 49.611, | |
| "eval_steps_per_second": 24.805, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 0.0002777777777777778, | |
| "loss": 0.3202, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3345409035682678, | |
| "eval_runtime": 501.0057, | |
| "eval_samples_per_second": 49.588, | |
| "eval_steps_per_second": 24.794, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 0.0002760942760942761, | |
| "loss": 0.3315, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3335334360599518, | |
| "eval_runtime": 500.8188, | |
| "eval_samples_per_second": 49.607, | |
| "eval_steps_per_second": 24.803, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "learning_rate": 0.0002744107744107744, | |
| "loss": 0.3288, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33116644620895386, | |
| "eval_runtime": 501.0568, | |
| "eval_samples_per_second": 49.583, | |
| "eval_steps_per_second": 24.792, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 0.0002727272727272727, | |
| "loss": 0.3371, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.34014323353767395, | |
| "eval_runtime": 500.7918, | |
| "eval_samples_per_second": 49.609, | |
| "eval_steps_per_second": 24.805, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 0.00027104377104377104, | |
| "loss": 0.3409, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33300086855888367, | |
| "eval_runtime": 500.8108, | |
| "eval_samples_per_second": 49.608, | |
| "eval_steps_per_second": 24.804, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 0.00026936026936026934, | |
| "loss": 0.3236, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3329709768295288, | |
| "eval_runtime": 500.7254, | |
| "eval_samples_per_second": 49.616, | |
| "eval_steps_per_second": 24.808, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 0.00026767676767676764, | |
| "loss": 0.3224, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3320678770542145, | |
| "eval_runtime": 500.9946, | |
| "eval_samples_per_second": 49.589, | |
| "eval_steps_per_second": 24.795, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 10.08, | |
| "learning_rate": 0.00026599326599326595, | |
| "loss": 0.3439, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 10.08, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33262181282043457, | |
| "eval_runtime": 501.0278, | |
| "eval_samples_per_second": 49.586, | |
| "eval_steps_per_second": 24.793, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "learning_rate": 0.0002643097643097643, | |
| "loss": 0.3382, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3310418426990509, | |
| "eval_runtime": 501.0043, | |
| "eval_samples_per_second": 49.588, | |
| "eval_steps_per_second": 24.794, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 10.59, | |
| "learning_rate": 0.0002626262626262626, | |
| "loss": 0.3307, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 10.59, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33820316195487976, | |
| "eval_runtime": 501.0192, | |
| "eval_samples_per_second": 49.587, | |
| "eval_steps_per_second": 24.793, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 10.84, | |
| "learning_rate": 0.0002609427609427609, | |
| "loss": 0.3231, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 10.84, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3324846625328064, | |
| "eval_runtime": 500.6054, | |
| "eval_samples_per_second": 49.628, | |
| "eval_steps_per_second": 24.814, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 11.09, | |
| "learning_rate": 0.0002592592592592592, | |
| "loss": 0.3095, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 11.09, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3348234295845032, | |
| "eval_runtime": 500.6559, | |
| "eval_samples_per_second": 49.623, | |
| "eval_steps_per_second": 24.811, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 11.34, | |
| "learning_rate": 0.00025757575757575756, | |
| "loss": 0.3442, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 11.34, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33274412155151367, | |
| "eval_runtime": 501.0421, | |
| "eval_samples_per_second": 49.585, | |
| "eval_steps_per_second": 24.792, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 11.59, | |
| "learning_rate": 0.00025589225589225587, | |
| "loss": 0.3269, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 11.59, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33261528611183167, | |
| "eval_runtime": 500.8659, | |
| "eval_samples_per_second": 49.602, | |
| "eval_steps_per_second": 24.801, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 11.85, | |
| "learning_rate": 0.00025420875420875417, | |
| "loss": 0.3323, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 11.85, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3308444619178772, | |
| "eval_runtime": 501.0609, | |
| "eval_samples_per_second": 49.583, | |
| "eval_steps_per_second": 24.791, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 12.1, | |
| "learning_rate": 0.0002525252525252525, | |
| "loss": 0.3313, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 12.1, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3308044970035553, | |
| "eval_runtime": 500.8061, | |
| "eval_samples_per_second": 49.608, | |
| "eval_steps_per_second": 24.804, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 12.35, | |
| "learning_rate": 0.0002508417508417508, | |
| "loss": 0.3283, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 12.35, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3314284384250641, | |
| "eval_runtime": 500.7554, | |
| "eval_samples_per_second": 49.613, | |
| "eval_steps_per_second": 24.807, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 12.6, | |
| "learning_rate": 0.00024915824915824913, | |
| "loss": 0.3331, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 12.6, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3306741416454315, | |
| "eval_runtime": 500.9128, | |
| "eval_samples_per_second": 49.597, | |
| "eval_steps_per_second": 24.799, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 12.85, | |
| "learning_rate": 0.0002474747474747475, | |
| "loss": 0.3317, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 12.85, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3343793749809265, | |
| "eval_runtime": 500.7387, | |
| "eval_samples_per_second": 49.615, | |
| "eval_steps_per_second": 24.807, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 13.11, | |
| "learning_rate": 0.0002457912457912458, | |
| "loss": 0.3283, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 13.11, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33199575543403625, | |
| "eval_runtime": 500.8527, | |
| "eval_samples_per_second": 49.603, | |
| "eval_steps_per_second": 24.802, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 13.36, | |
| "learning_rate": 0.00024410774410774406, | |
| "loss": 0.3263, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 13.36, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33114317059516907, | |
| "eval_runtime": 500.8605, | |
| "eval_samples_per_second": 49.603, | |
| "eval_steps_per_second": 24.801, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 13.61, | |
| "learning_rate": 0.0002424242424242424, | |
| "loss": 0.3421, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 13.61, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3306863009929657, | |
| "eval_runtime": 500.9253, | |
| "eval_samples_per_second": 49.596, | |
| "eval_steps_per_second": 24.798, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 13.86, | |
| "learning_rate": 0.00024074074074074072, | |
| "loss": 0.3164, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 13.86, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3318192958831787, | |
| "eval_runtime": 500.4762, | |
| "eval_samples_per_second": 49.641, | |
| "eval_steps_per_second": 24.82, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "learning_rate": 0.00023905723905723905, | |
| "loss": 0.3315, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3335217535495758, | |
| "eval_runtime": 500.848, | |
| "eval_samples_per_second": 49.604, | |
| "eval_steps_per_second": 24.802, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 14.37, | |
| "learning_rate": 0.00023737373737373732, | |
| "loss": 0.3415, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 14.37, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3314845860004425, | |
| "eval_runtime": 500.8633, | |
| "eval_samples_per_second": 49.602, | |
| "eval_steps_per_second": 24.801, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "learning_rate": 0.00023569023569023565, | |
| "loss": 0.3325, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33067190647125244, | |
| "eval_runtime": 500.8182, | |
| "eval_samples_per_second": 49.607, | |
| "eval_steps_per_second": 24.803, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 14.87, | |
| "learning_rate": 0.00023400673400673398, | |
| "loss": 0.3264, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 14.87, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33295896649360657, | |
| "eval_runtime": 500.9198, | |
| "eval_samples_per_second": 49.597, | |
| "eval_steps_per_second": 24.798, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 15.12, | |
| "learning_rate": 0.0002323232323232323, | |
| "loss": 0.3223, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 15.12, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3306836187839508, | |
| "eval_runtime": 500.9444, | |
| "eval_samples_per_second": 49.594, | |
| "eval_steps_per_second": 24.797, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 15.37, | |
| "learning_rate": 0.00023063973063973064, | |
| "loss": 0.3289, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 15.37, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3328978717327118, | |
| "eval_runtime": 500.8768, | |
| "eval_samples_per_second": 49.601, | |
| "eval_steps_per_second": 24.801, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 15.63, | |
| "learning_rate": 0.00022895622895622892, | |
| "loss": 0.3353, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 15.63, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33112406730651855, | |
| "eval_runtime": 500.9115, | |
| "eval_samples_per_second": 49.598, | |
| "eval_steps_per_second": 24.799, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 15.88, | |
| "learning_rate": 0.00022727272727272725, | |
| "loss": 0.3246, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 15.88, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3310993015766144, | |
| "eval_runtime": 501.1149, | |
| "eval_samples_per_second": 49.577, | |
| "eval_steps_per_second": 24.789, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 16.13, | |
| "learning_rate": 0.00022558922558922557, | |
| "loss": 0.3425, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 16.13, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.330674409866333, | |
| "eval_runtime": 500.85, | |
| "eval_samples_per_second": 49.604, | |
| "eval_steps_per_second": 24.802, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 16.38, | |
| "learning_rate": 0.0002239057239057239, | |
| "loss": 0.331, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 16.38, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3306954503059387, | |
| "eval_runtime": 501.1853, | |
| "eval_samples_per_second": 49.57, | |
| "eval_steps_per_second": 24.785, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 16.64, | |
| "learning_rate": 0.00022222222222222218, | |
| "loss": 0.3293, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 16.64, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33531516790390015, | |
| "eval_runtime": 500.9658, | |
| "eval_samples_per_second": 49.592, | |
| "eval_steps_per_second": 24.796, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 16.89, | |
| "learning_rate": 0.0002205387205387205, | |
| "loss": 0.3249, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 16.89, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3339368402957916, | |
| "eval_runtime": 500.8138, | |
| "eval_samples_per_second": 49.607, | |
| "eval_steps_per_second": 24.804, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 17.14, | |
| "learning_rate": 0.00021885521885521884, | |
| "loss": 0.3214, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 17.14, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3337734639644623, | |
| "eval_runtime": 500.9586, | |
| "eval_samples_per_second": 49.593, | |
| "eval_steps_per_second": 24.796, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "learning_rate": 0.00021717171717171717, | |
| "loss": 0.3259, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3327140212059021, | |
| "eval_runtime": 501.0122, | |
| "eval_samples_per_second": 49.588, | |
| "eval_steps_per_second": 24.794, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 17.64, | |
| "learning_rate": 0.00021548821548821544, | |
| "loss": 0.3408, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 17.64, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33182209730148315, | |
| "eval_runtime": 500.9279, | |
| "eval_samples_per_second": 49.596, | |
| "eval_steps_per_second": 24.798, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 17.9, | |
| "learning_rate": 0.00021380471380471377, | |
| "loss": 0.3258, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 17.9, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33183717727661133, | |
| "eval_runtime": 501.2678, | |
| "eval_samples_per_second": 49.562, | |
| "eval_steps_per_second": 24.781, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 18.15, | |
| "learning_rate": 0.0002121212121212121, | |
| "loss": 0.3299, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 18.15, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33080849051475525, | |
| "eval_runtime": 501.2555, | |
| "eval_samples_per_second": 49.564, | |
| "eval_steps_per_second": 24.782, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 18.4, | |
| "learning_rate": 0.00021043771043771043, | |
| "loss": 0.327, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 18.4, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3371123671531677, | |
| "eval_runtime": 500.9892, | |
| "eval_samples_per_second": 49.59, | |
| "eval_steps_per_second": 24.795, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 18.65, | |
| "learning_rate": 0.00020875420875420876, | |
| "loss": 0.3317, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 18.65, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3307543694972992, | |
| "eval_runtime": 500.9046, | |
| "eval_samples_per_second": 49.598, | |
| "eval_steps_per_second": 24.799, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 18.9, | |
| "learning_rate": 0.00020707070707070703, | |
| "loss": 0.3291, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 18.9, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33100253343582153, | |
| "eval_runtime": 500.936, | |
| "eval_samples_per_second": 49.595, | |
| "eval_steps_per_second": 24.798, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 19.16, | |
| "learning_rate": 0.00020538720538720536, | |
| "loss": 0.3263, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 19.16, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33248230814933777, | |
| "eval_runtime": 500.8588, | |
| "eval_samples_per_second": 49.603, | |
| "eval_steps_per_second": 24.801, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 19.41, | |
| "learning_rate": 0.0002037037037037037, | |
| "loss": 0.3223, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 19.41, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33463332056999207, | |
| "eval_runtime": 500.8344, | |
| "eval_samples_per_second": 49.605, | |
| "eval_steps_per_second": 24.803, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 19.66, | |
| "learning_rate": 0.00020202020202020202, | |
| "loss": 0.3403, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 19.66, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3315812647342682, | |
| "eval_runtime": 500.8929, | |
| "eval_samples_per_second": 49.599, | |
| "eval_steps_per_second": 24.8, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "learning_rate": 0.0002003367003367003, | |
| "loss": 0.3265, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3309071660041809, | |
| "eval_runtime": 500.8065, | |
| "eval_samples_per_second": 49.608, | |
| "eval_steps_per_second": 24.804, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 20.16, | |
| "learning_rate": 0.00019865319865319862, | |
| "loss": 0.33, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 20.16, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3318468928337097, | |
| "eval_runtime": 501.0869, | |
| "eval_samples_per_second": 49.58, | |
| "eval_steps_per_second": 24.79, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 20.42, | |
| "learning_rate": 0.00019696969696969695, | |
| "loss": 0.3488, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 20.42, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33127933740615845, | |
| "eval_runtime": 500.8956, | |
| "eval_samples_per_second": 49.599, | |
| "eval_steps_per_second": 24.8, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 20.67, | |
| "learning_rate": 0.00019528619528619528, | |
| "loss": 0.3293, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 20.67, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33354687690734863, | |
| "eval_runtime": 501.2059, | |
| "eval_samples_per_second": 49.568, | |
| "eval_steps_per_second": 24.784, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 20.92, | |
| "learning_rate": 0.00019360269360269356, | |
| "loss": 0.3095, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 20.92, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33559244871139526, | |
| "eval_runtime": 501.1442, | |
| "eval_samples_per_second": 49.575, | |
| "eval_steps_per_second": 24.787, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 21.17, | |
| "learning_rate": 0.0001919191919191919, | |
| "loss": 0.3366, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 21.17, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3331533968448639, | |
| "eval_runtime": 501.0445, | |
| "eval_samples_per_second": 49.584, | |
| "eval_steps_per_second": 24.792, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 21.42, | |
| "learning_rate": 0.00019023569023569022, | |
| "loss": 0.317, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 21.42, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3337852358818054, | |
| "eval_runtime": 501.2024, | |
| "eval_samples_per_second": 49.569, | |
| "eval_steps_per_second": 24.784, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 21.68, | |
| "learning_rate": 0.00018855218855218854, | |
| "loss": 0.3299, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 21.68, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3308347165584564, | |
| "eval_runtime": 501.0, | |
| "eval_samples_per_second": 49.589, | |
| "eval_steps_per_second": 24.794, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 21.93, | |
| "learning_rate": 0.00018686868686868687, | |
| "loss": 0.3434, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 21.93, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.330965131521225, | |
| "eval_runtime": 501.1751, | |
| "eval_samples_per_second": 49.571, | |
| "eval_steps_per_second": 24.786, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 22.18, | |
| "learning_rate": 0.00018518518518518515, | |
| "loss": 0.3208, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 22.18, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3308819830417633, | |
| "eval_runtime": 501.189, | |
| "eval_samples_per_second": 49.57, | |
| "eval_steps_per_second": 24.785, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 22.43, | |
| "learning_rate": 0.00018350168350168348, | |
| "loss": 0.3351, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 22.43, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33238929510116577, | |
| "eval_runtime": 501.2428, | |
| "eval_samples_per_second": 49.565, | |
| "eval_steps_per_second": 24.782, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 22.68, | |
| "learning_rate": 0.0001818181818181818, | |
| "loss": 0.3301, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 22.68, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3308260440826416, | |
| "eval_runtime": 500.8427, | |
| "eval_samples_per_second": 49.604, | |
| "eval_steps_per_second": 24.802, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 22.94, | |
| "learning_rate": 0.00018013468013468014, | |
| "loss": 0.3196, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 22.94, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3329552710056305, | |
| "eval_runtime": 501.1009, | |
| "eval_samples_per_second": 49.579, | |
| "eval_steps_per_second": 24.789, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 23.19, | |
| "learning_rate": 0.0001784511784511784, | |
| "loss": 0.3339, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 23.19, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33333924412727356, | |
| "eval_runtime": 500.8988, | |
| "eval_samples_per_second": 49.599, | |
| "eval_steps_per_second": 24.799, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 23.44, | |
| "learning_rate": 0.00017676767676767674, | |
| "loss": 0.3249, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 23.44, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3307563364505768, | |
| "eval_runtime": 501.0394, | |
| "eval_samples_per_second": 49.585, | |
| "eval_steps_per_second": 24.792, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 23.69, | |
| "learning_rate": 0.00017508417508417507, | |
| "loss": 0.3247, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 23.69, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3338105082511902, | |
| "eval_runtime": 500.7662, | |
| "eval_samples_per_second": 49.612, | |
| "eval_steps_per_second": 24.806, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 23.94, | |
| "learning_rate": 0.0001734006734006734, | |
| "loss": 0.3369, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 23.94, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3312574028968811, | |
| "eval_runtime": 501.3028, | |
| "eval_samples_per_second": 49.559, | |
| "eval_steps_per_second": 24.779, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 24.2, | |
| "learning_rate": 0.00017171717171717167, | |
| "loss": 0.3291, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 24.2, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3320288062095642, | |
| "eval_runtime": 501.3419, | |
| "eval_samples_per_second": 49.555, | |
| "eval_steps_per_second": 24.777, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 24.45, | |
| "learning_rate": 0.00017003367003367, | |
| "loss": 0.3307, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 24.45, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33087393641471863, | |
| "eval_runtime": 501.1809, | |
| "eval_samples_per_second": 49.571, | |
| "eval_steps_per_second": 24.785, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 24.7, | |
| "learning_rate": 0.00016835016835016833, | |
| "loss": 0.3328, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 24.7, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33073562383651733, | |
| "eval_runtime": 500.8708, | |
| "eval_samples_per_second": 49.602, | |
| "eval_steps_per_second": 24.801, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 24.95, | |
| "learning_rate": 0.00016666666666666666, | |
| "loss": 0.3277, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 24.95, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3341946601867676, | |
| "eval_runtime": 501.1287, | |
| "eval_samples_per_second": 49.576, | |
| "eval_steps_per_second": 24.788, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 25.2, | |
| "learning_rate": 0.000164983164983165, | |
| "loss": 0.3278, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 25.2, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3309917449951172, | |
| "eval_runtime": 500.9142, | |
| "eval_samples_per_second": 49.597, | |
| "eval_steps_per_second": 24.799, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 25.46, | |
| "learning_rate": 0.00016329966329966327, | |
| "loss": 0.3197, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 25.46, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3348841965198517, | |
| "eval_runtime": 501.1589, | |
| "eval_samples_per_second": 49.573, | |
| "eval_steps_per_second": 24.787, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 25.71, | |
| "learning_rate": 0.0001616161616161616, | |
| "loss": 0.3273, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 25.71, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3321140706539154, | |
| "eval_runtime": 501.4344, | |
| "eval_samples_per_second": 49.546, | |
| "eval_steps_per_second": 24.773, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 25.96, | |
| "learning_rate": 0.00015993265993265992, | |
| "loss": 0.3345, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 25.96, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3312225043773651, | |
| "eval_runtime": 500.9045, | |
| "eval_samples_per_second": 49.598, | |
| "eval_steps_per_second": 24.799, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 26.21, | |
| "learning_rate": 0.00015824915824915825, | |
| "loss": 0.3351, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 26.21, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33248284459114075, | |
| "eval_runtime": 501.2052, | |
| "eval_samples_per_second": 49.569, | |
| "eval_steps_per_second": 24.784, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 26.47, | |
| "learning_rate": 0.00015656565656565653, | |
| "loss": 0.3144, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 26.47, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.334583580493927, | |
| "eval_runtime": 501.103, | |
| "eval_samples_per_second": 49.579, | |
| "eval_steps_per_second": 24.789, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 26.72, | |
| "learning_rate": 0.00015488215488215486, | |
| "loss": 0.3361, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 26.72, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33112606406211853, | |
| "eval_runtime": 500.6859, | |
| "eval_samples_per_second": 49.62, | |
| "eval_steps_per_second": 24.81, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 26.97, | |
| "learning_rate": 0.00015319865319865319, | |
| "loss": 0.3334, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 26.97, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3306848406791687, | |
| "eval_runtime": 500.81, | |
| "eval_samples_per_second": 49.608, | |
| "eval_steps_per_second": 24.804, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 27.22, | |
| "learning_rate": 0.00015151515151515152, | |
| "loss": 0.3287, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 27.22, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3373050093650818, | |
| "eval_runtime": 501.0133, | |
| "eval_samples_per_second": 49.588, | |
| "eval_steps_per_second": 24.794, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 27.47, | |
| "learning_rate": 0.00014983164983164982, | |
| "loss": 0.3374, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 27.47, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3306737244129181, | |
| "eval_runtime": 501.4272, | |
| "eval_samples_per_second": 49.547, | |
| "eval_steps_per_second": 24.773, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 27.73, | |
| "learning_rate": 0.00014814814814814812, | |
| "loss": 0.3302, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 27.73, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3306819200515747, | |
| "eval_runtime": 501.0901, | |
| "eval_samples_per_second": 49.58, | |
| "eval_steps_per_second": 24.79, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 27.98, | |
| "learning_rate": 0.00014646464646464645, | |
| "loss": 0.3245, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 27.98, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33153215050697327, | |
| "eval_runtime": 501.2107, | |
| "eval_samples_per_second": 49.568, | |
| "eval_steps_per_second": 24.784, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 28.23, | |
| "learning_rate": 0.00014478114478114478, | |
| "loss": 0.3353, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 28.23, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33351030945777893, | |
| "eval_runtime": 501.1769, | |
| "eval_samples_per_second": 49.571, | |
| "eval_steps_per_second": 24.786, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 28.48, | |
| "learning_rate": 0.00014309764309764308, | |
| "loss": 0.3191, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 28.48, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33356890082359314, | |
| "eval_runtime": 501.2908, | |
| "eval_samples_per_second": 49.56, | |
| "eval_steps_per_second": 24.78, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 28.73, | |
| "learning_rate": 0.0001414141414141414, | |
| "loss": 0.3226, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 28.73, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33081722259521484, | |
| "eval_runtime": 501.1297, | |
| "eval_samples_per_second": 49.576, | |
| "eval_steps_per_second": 24.788, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 28.99, | |
| "learning_rate": 0.0001397306397306397, | |
| "loss": 0.3384, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 28.99, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3321709930896759, | |
| "eval_runtime": 501.2824, | |
| "eval_samples_per_second": 49.561, | |
| "eval_steps_per_second": 24.78, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 29.24, | |
| "learning_rate": 0.00013804713804713804, | |
| "loss": 0.3368, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 29.24, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3336506485939026, | |
| "eval_runtime": 500.9493, | |
| "eval_samples_per_second": 49.594, | |
| "eval_steps_per_second": 24.797, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 29.49, | |
| "learning_rate": 0.00013636363636363634, | |
| "loss": 0.3224, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 29.49, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3331962823867798, | |
| "eval_runtime": 501.3861, | |
| "eval_samples_per_second": 49.551, | |
| "eval_steps_per_second": 24.775, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 29.74, | |
| "learning_rate": 0.00013468013468013467, | |
| "loss": 0.3224, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 29.74, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3318473696708679, | |
| "eval_runtime": 501.3052, | |
| "eval_samples_per_second": 49.559, | |
| "eval_steps_per_second": 24.779, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 29.99, | |
| "learning_rate": 0.00013299663299663297, | |
| "loss": 0.3363, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 29.99, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3309670388698578, | |
| "eval_runtime": 501.4331, | |
| "eval_samples_per_second": 49.546, | |
| "eval_steps_per_second": 24.773, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 30.25, | |
| "learning_rate": 0.0001313131313131313, | |
| "loss": 0.327, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 30.25, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3306976854801178, | |
| "eval_runtime": 501.4726, | |
| "eval_samples_per_second": 49.542, | |
| "eval_steps_per_second": 24.771, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 30.5, | |
| "learning_rate": 0.0001296296296296296, | |
| "loss": 0.3291, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 30.5, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3306950628757477, | |
| "eval_runtime": 501.0845, | |
| "eval_samples_per_second": 49.58, | |
| "eval_steps_per_second": 24.79, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 30.75, | |
| "learning_rate": 0.00012794612794612793, | |
| "loss": 0.3369, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 30.75, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3321588933467865, | |
| "eval_runtime": 501.0681, | |
| "eval_samples_per_second": 49.582, | |
| "eval_steps_per_second": 24.791, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 0.00012626262626262626, | |
| "loss": 0.3211, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3328503370285034, | |
| "eval_runtime": 501.3433, | |
| "eval_samples_per_second": 49.555, | |
| "eval_steps_per_second": 24.777, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 31.25, | |
| "learning_rate": 0.00012457912457912456, | |
| "loss": 0.329, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 31.25, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33213621377944946, | |
| "eval_runtime": 501.089, | |
| "eval_samples_per_second": 49.58, | |
| "eval_steps_per_second": 24.79, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 31.51, | |
| "learning_rate": 0.0001228956228956229, | |
| "loss": 0.3206, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 31.51, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33089062571525574, | |
| "eval_runtime": 501.0382, | |
| "eval_samples_per_second": 49.585, | |
| "eval_steps_per_second": 24.793, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 31.76, | |
| "learning_rate": 0.0001212121212121212, | |
| "loss": 0.3339, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 31.76, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3331972658634186, | |
| "eval_runtime": 501.242, | |
| "eval_samples_per_second": 49.565, | |
| "eval_steps_per_second": 24.782, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 32.01, | |
| "learning_rate": 0.00011952861952861952, | |
| "loss": 0.3323, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 32.01, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3316064476966858, | |
| "eval_runtime": 501.0626, | |
| "eval_samples_per_second": 49.583, | |
| "eval_steps_per_second": 24.791, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 32.26, | |
| "learning_rate": 0.00011784511784511783, | |
| "loss": 0.3273, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 32.26, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3323478400707245, | |
| "eval_runtime": 501.4098, | |
| "eval_samples_per_second": 49.548, | |
| "eval_steps_per_second": 24.774, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 32.51, | |
| "learning_rate": 0.00011616161616161616, | |
| "loss": 0.3362, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 32.51, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33072328567504883, | |
| "eval_runtime": 501.8013, | |
| "eval_samples_per_second": 49.51, | |
| "eval_steps_per_second": 24.755, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 32.77, | |
| "learning_rate": 0.00011447811447811446, | |
| "loss": 0.3387, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 32.77, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3308790922164917, | |
| "eval_runtime": 501.2768, | |
| "eval_samples_per_second": 49.561, | |
| "eval_steps_per_second": 24.781, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 33.02, | |
| "learning_rate": 0.00011279461279461279, | |
| "loss": 0.3173, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 33.02, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33112308382987976, | |
| "eval_runtime": 501.2827, | |
| "eval_samples_per_second": 49.561, | |
| "eval_steps_per_second": 24.78, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 33.27, | |
| "learning_rate": 0.00011111111111111109, | |
| "loss": 0.3291, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 33.27, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33085396885871887, | |
| "eval_runtime": 501.2449, | |
| "eval_samples_per_second": 49.565, | |
| "eval_steps_per_second": 24.782, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 33.52, | |
| "learning_rate": 0.00010942760942760942, | |
| "loss": 0.3316, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 33.52, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33154767751693726, | |
| "eval_runtime": 501.0257, | |
| "eval_samples_per_second": 49.586, | |
| "eval_steps_per_second": 24.793, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 33.77, | |
| "learning_rate": 0.00010774410774410772, | |
| "loss": 0.3366, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 33.77, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33317822217941284, | |
| "eval_runtime": 501.0403, | |
| "eval_samples_per_second": 49.585, | |
| "eval_steps_per_second": 24.792, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 34.03, | |
| "learning_rate": 0.00010606060606060605, | |
| "loss": 0.3115, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 34.03, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3382599353790283, | |
| "eval_runtime": 500.7949, | |
| "eval_samples_per_second": 49.609, | |
| "eval_steps_per_second": 24.805, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 34.28, | |
| "learning_rate": 0.00010437710437710438, | |
| "loss": 0.3275, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 34.28, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.332431823015213, | |
| "eval_runtime": 500.933, | |
| "eval_samples_per_second": 49.595, | |
| "eval_steps_per_second": 24.798, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 34.53, | |
| "learning_rate": 0.00010269360269360268, | |
| "loss": 0.3373, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 34.53, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33150389790534973, | |
| "eval_runtime": 500.781, | |
| "eval_samples_per_second": 49.611, | |
| "eval_steps_per_second": 24.805, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 34.78, | |
| "learning_rate": 0.00010101010101010101, | |
| "loss": 0.3247, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 34.78, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3313423693180084, | |
| "eval_runtime": 501.1265, | |
| "eval_samples_per_second": 49.576, | |
| "eval_steps_per_second": 24.788, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 35.03, | |
| "learning_rate": 9.932659932659931e-05, | |
| "loss": 0.3349, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 35.03, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33250102400779724, | |
| "eval_runtime": 501.0936, | |
| "eval_samples_per_second": 49.58, | |
| "eval_steps_per_second": 24.79, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 35.29, | |
| "learning_rate": 9.764309764309764e-05, | |
| "loss": 0.3223, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 35.29, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33122116327285767, | |
| "eval_runtime": 500.9681, | |
| "eval_samples_per_second": 49.592, | |
| "eval_steps_per_second": 24.796, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 35.54, | |
| "learning_rate": 9.595959595959594e-05, | |
| "loss": 0.3321, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 35.54, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3307776153087616, | |
| "eval_runtime": 500.9865, | |
| "eval_samples_per_second": 49.59, | |
| "eval_steps_per_second": 24.795, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 35.79, | |
| "learning_rate": 9.427609427609427e-05, | |
| "loss": 0.3304, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 35.79, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3315562605857849, | |
| "eval_runtime": 500.788, | |
| "eval_samples_per_second": 49.61, | |
| "eval_steps_per_second": 24.805, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 36.04, | |
| "learning_rate": 9.259259259259257e-05, | |
| "loss": 0.3262, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 36.04, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33196473121643066, | |
| "eval_runtime": 501.0344, | |
| "eval_samples_per_second": 49.585, | |
| "eval_steps_per_second": 24.793, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 36.29, | |
| "learning_rate": 9.09090909090909e-05, | |
| "loss": 0.3239, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 36.29, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3316847085952759, | |
| "eval_runtime": 501.0629, | |
| "eval_samples_per_second": 49.583, | |
| "eval_steps_per_second": 24.791, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 36.55, | |
| "learning_rate": 8.92255892255892e-05, | |
| "loss": 0.3325, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 36.55, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3308143615722656, | |
| "eval_runtime": 500.8635, | |
| "eval_samples_per_second": 49.602, | |
| "eval_steps_per_second": 24.801, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 36.8, | |
| "learning_rate": 8.754208754208753e-05, | |
| "loss": 0.325, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 36.8, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3316170275211334, | |
| "eval_runtime": 500.7755, | |
| "eval_samples_per_second": 49.611, | |
| "eval_steps_per_second": 24.806, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 37.05, | |
| "learning_rate": 8.585858585858584e-05, | |
| "loss": 0.3416, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 37.05, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3310682773590088, | |
| "eval_runtime": 501.0155, | |
| "eval_samples_per_second": 49.587, | |
| "eval_steps_per_second": 24.794, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 37.3, | |
| "learning_rate": 8.417508417508417e-05, | |
| "loss": 0.3226, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 37.3, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33090585470199585, | |
| "eval_runtime": 500.9851, | |
| "eval_samples_per_second": 49.59, | |
| "eval_steps_per_second": 24.795, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 37.56, | |
| "learning_rate": 8.24915824915825e-05, | |
| "loss": 0.3286, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 37.56, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3306817412376404, | |
| "eval_runtime": 500.941, | |
| "eval_samples_per_second": 49.595, | |
| "eval_steps_per_second": 24.797, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 37.81, | |
| "learning_rate": 8.08080808080808e-05, | |
| "loss": 0.3284, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 37.81, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3312084972858429, | |
| "eval_runtime": 500.7822, | |
| "eval_samples_per_second": 49.61, | |
| "eval_steps_per_second": 24.805, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 38.06, | |
| "learning_rate": 7.912457912457913e-05, | |
| "loss": 0.3298, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 38.06, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33263882994651794, | |
| "eval_runtime": 500.7072, | |
| "eval_samples_per_second": 49.618, | |
| "eval_steps_per_second": 24.809, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 38.31, | |
| "learning_rate": 7.744107744107743e-05, | |
| "loss": 0.3383, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 38.31, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33113569021224976, | |
| "eval_runtime": 501.0449, | |
| "eval_samples_per_second": 49.584, | |
| "eval_steps_per_second": 24.792, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 38.56, | |
| "learning_rate": 7.575757575757576e-05, | |
| "loss": 0.3418, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 38.56, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33080732822418213, | |
| "eval_runtime": 501.1158, | |
| "eval_samples_per_second": 49.577, | |
| "eval_steps_per_second": 24.789, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 38.82, | |
| "learning_rate": 7.407407407407406e-05, | |
| "loss": 0.3123, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 38.82, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3311246931552887, | |
| "eval_runtime": 501.2192, | |
| "eval_samples_per_second": 49.567, | |
| "eval_steps_per_second": 24.784, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 39.07, | |
| "learning_rate": 7.239057239057239e-05, | |
| "loss": 0.3237, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 39.07, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3346453011035919, | |
| "eval_runtime": 501.3886, | |
| "eval_samples_per_second": 49.55, | |
| "eval_steps_per_second": 24.775, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 39.32, | |
| "learning_rate": 7.07070707070707e-05, | |
| "loss": 0.3261, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 39.32, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33250510692596436, | |
| "eval_runtime": 501.4416, | |
| "eval_samples_per_second": 49.545, | |
| "eval_steps_per_second": 24.773, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 39.57, | |
| "learning_rate": 6.902356902356902e-05, | |
| "loss": 0.3269, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 39.57, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33122241497039795, | |
| "eval_runtime": 501.3022, | |
| "eval_samples_per_second": 49.559, | |
| "eval_steps_per_second": 24.779, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 39.82, | |
| "learning_rate": 6.734006734006734e-05, | |
| "loss": 0.3267, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 39.82, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3319133520126343, | |
| "eval_runtime": 501.3262, | |
| "eval_samples_per_second": 49.557, | |
| "eval_steps_per_second": 24.778, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 40.08, | |
| "learning_rate": 6.565656565656565e-05, | |
| "loss": 0.3381, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 40.08, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33269181847572327, | |
| "eval_runtime": 500.8586, | |
| "eval_samples_per_second": 49.603, | |
| "eval_steps_per_second": 24.801, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 40.33, | |
| "learning_rate": 6.397306397306397e-05, | |
| "loss": 0.3238, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 40.33, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3325785994529724, | |
| "eval_runtime": 501.1224, | |
| "eval_samples_per_second": 49.577, | |
| "eval_steps_per_second": 24.788, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 40.58, | |
| "learning_rate": 6.228956228956228e-05, | |
| "loss": 0.3299, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 40.58, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33203861117362976, | |
| "eval_runtime": 500.8556, | |
| "eval_samples_per_second": 49.603, | |
| "eval_steps_per_second": 24.802, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 40.83, | |
| "learning_rate": 6.06060606060606e-05, | |
| "loss": 0.3385, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 40.83, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33086949586868286, | |
| "eval_runtime": 501.1006, | |
| "eval_samples_per_second": 49.579, | |
| "eval_steps_per_second": 24.789, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 41.08, | |
| "learning_rate": 5.8922558922558913e-05, | |
| "loss": 0.3268, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 41.08, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33224013447761536, | |
| "eval_runtime": 500.9055, | |
| "eval_samples_per_second": 49.598, | |
| "eval_steps_per_second": 24.799, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 41.34, | |
| "learning_rate": 5.723905723905723e-05, | |
| "loss": 0.3253, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 41.34, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3319920301437378, | |
| "eval_runtime": 501.2306, | |
| "eval_samples_per_second": 49.566, | |
| "eval_steps_per_second": 24.783, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 41.59, | |
| "learning_rate": 5.5555555555555545e-05, | |
| "loss": 0.3261, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 41.59, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33143314719200134, | |
| "eval_runtime": 501.0325, | |
| "eval_samples_per_second": 49.586, | |
| "eval_steps_per_second": 24.793, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 41.84, | |
| "learning_rate": 5.387205387205386e-05, | |
| "loss": 0.3362, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 41.84, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3324301838874817, | |
| "eval_runtime": 500.7987, | |
| "eval_samples_per_second": 49.609, | |
| "eval_steps_per_second": 24.804, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 42.09, | |
| "learning_rate": 5.218855218855219e-05, | |
| "loss": 0.3203, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 42.09, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3325912356376648, | |
| "eval_runtime": 500.6821, | |
| "eval_samples_per_second": 49.62, | |
| "eval_steps_per_second": 24.81, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 42.34, | |
| "learning_rate": 5.0505050505050505e-05, | |
| "loss": 0.325, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 42.34, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3323043882846832, | |
| "eval_runtime": 501.26, | |
| "eval_samples_per_second": 49.563, | |
| "eval_steps_per_second": 24.782, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 42.6, | |
| "learning_rate": 4.882154882154882e-05, | |
| "loss": 0.3172, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 42.6, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33261871337890625, | |
| "eval_runtime": 500.9553, | |
| "eval_samples_per_second": 49.593, | |
| "eval_steps_per_second": 24.797, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 42.85, | |
| "learning_rate": 4.7138047138047136e-05, | |
| "loss": 0.3361, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 42.85, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3307626247406006, | |
| "eval_runtime": 501.0928, | |
| "eval_samples_per_second": 49.58, | |
| "eval_steps_per_second": 24.79, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 43.1, | |
| "learning_rate": 4.545454545454545e-05, | |
| "loss": 0.3432, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 43.1, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3309696614742279, | |
| "eval_runtime": 501.4895, | |
| "eval_samples_per_second": 49.54, | |
| "eval_steps_per_second": 24.77, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 43.35, | |
| "learning_rate": 4.377104377104377e-05, | |
| "loss": 0.3396, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 43.35, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3312534689903259, | |
| "eval_runtime": 501.211, | |
| "eval_samples_per_second": 49.568, | |
| "eval_steps_per_second": 24.784, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 43.6, | |
| "learning_rate": 4.208754208754208e-05, | |
| "loss": 0.3163, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 43.6, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33276140689849854, | |
| "eval_runtime": 501.2264, | |
| "eval_samples_per_second": 49.566, | |
| "eval_steps_per_second": 24.783, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 43.86, | |
| "learning_rate": 4.04040404040404e-05, | |
| "loss": 0.3353, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 43.86, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3318198025226593, | |
| "eval_runtime": 501.088, | |
| "eval_samples_per_second": 49.58, | |
| "eval_steps_per_second": 24.79, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 44.11, | |
| "learning_rate": 3.8720538720538714e-05, | |
| "loss": 0.3299, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 44.11, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3316582441329956, | |
| "eval_runtime": 501.3763, | |
| "eval_samples_per_second": 49.552, | |
| "eval_steps_per_second": 24.776, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 44.36, | |
| "learning_rate": 3.703703703703703e-05, | |
| "loss": 0.3213, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 44.36, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33194538950920105, | |
| "eval_runtime": 501.7895, | |
| "eval_samples_per_second": 49.511, | |
| "eval_steps_per_second": 24.755, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 44.61, | |
| "learning_rate": 3.535353535353535e-05, | |
| "loss": 0.3253, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 44.61, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33288490772247314, | |
| "eval_runtime": 501.1283, | |
| "eval_samples_per_second": 49.576, | |
| "eval_steps_per_second": 24.788, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 44.86, | |
| "learning_rate": 3.367003367003367e-05, | |
| "loss": 0.3391, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 44.86, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33222696185112, | |
| "eval_runtime": 501.5052, | |
| "eval_samples_per_second": 49.539, | |
| "eval_steps_per_second": 24.769, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 45.12, | |
| "learning_rate": 3.198653198653198e-05, | |
| "loss": 0.3179, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 45.12, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.333011269569397, | |
| "eval_runtime": 500.7091, | |
| "eval_samples_per_second": 49.618, | |
| "eval_steps_per_second": 24.809, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 45.37, | |
| "learning_rate": 3.03030303030303e-05, | |
| "loss": 0.3348, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 45.37, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3321002721786499, | |
| "eval_runtime": 501.2516, | |
| "eval_samples_per_second": 49.564, | |
| "eval_steps_per_second": 24.782, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 45.62, | |
| "learning_rate": 2.8619528619528615e-05, | |
| "loss": 0.3116, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 45.62, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33259570598602295, | |
| "eval_runtime": 501.2743, | |
| "eval_samples_per_second": 49.562, | |
| "eval_steps_per_second": 24.781, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 45.87, | |
| "learning_rate": 2.693602693602693e-05, | |
| "loss": 0.3334, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 45.87, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33218705654144287, | |
| "eval_runtime": 501.0248, | |
| "eval_samples_per_second": 49.586, | |
| "eval_steps_per_second": 24.793, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 46.12, | |
| "learning_rate": 2.5252525252525253e-05, | |
| "loss": 0.3401, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 46.12, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3314586579799652, | |
| "eval_runtime": 501.1615, | |
| "eval_samples_per_second": 49.573, | |
| "eval_steps_per_second": 24.786, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 46.38, | |
| "learning_rate": 2.3569023569023568e-05, | |
| "loss": 0.3381, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 46.38, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33111417293548584, | |
| "eval_runtime": 501.2779, | |
| "eval_samples_per_second": 49.561, | |
| "eval_steps_per_second": 24.781, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 46.63, | |
| "learning_rate": 2.1885521885521884e-05, | |
| "loss": 0.3154, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 46.63, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3327486515045166, | |
| "eval_runtime": 500.9424, | |
| "eval_samples_per_second": 49.595, | |
| "eval_steps_per_second": 24.797, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 46.88, | |
| "learning_rate": 2.02020202020202e-05, | |
| "loss": 0.3348, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 46.88, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33224716782569885, | |
| "eval_runtime": 500.9745, | |
| "eval_samples_per_second": 49.591, | |
| "eval_steps_per_second": 24.796, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 47.13, | |
| "learning_rate": 1.8518518518518515e-05, | |
| "loss": 0.3285, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 47.13, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3325360119342804, | |
| "eval_runtime": 501.157, | |
| "eval_samples_per_second": 49.573, | |
| "eval_steps_per_second": 24.787, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 47.39, | |
| "learning_rate": 1.6835016835016834e-05, | |
| "loss": 0.3256, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 47.39, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3328581750392914, | |
| "eval_runtime": 501.2027, | |
| "eval_samples_per_second": 49.569, | |
| "eval_steps_per_second": 24.784, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 47.64, | |
| "learning_rate": 1.515151515151515e-05, | |
| "loss": 0.3389, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 47.64, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3324856758117676, | |
| "eval_runtime": 501.1837, | |
| "eval_samples_per_second": 49.571, | |
| "eval_steps_per_second": 24.785, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 47.89, | |
| "learning_rate": 1.3468013468013465e-05, | |
| "loss": 0.3288, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 47.89, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3326534032821655, | |
| "eval_runtime": 501.4739, | |
| "eval_samples_per_second": 49.542, | |
| "eval_steps_per_second": 24.771, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 48.14, | |
| "learning_rate": 1.1784511784511784e-05, | |
| "loss": 0.3172, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 48.14, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3326767683029175, | |
| "eval_runtime": 500.8014, | |
| "eval_samples_per_second": 49.608, | |
| "eval_steps_per_second": 24.804, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 48.39, | |
| "learning_rate": 1.01010101010101e-05, | |
| "loss": 0.3211, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 48.39, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.3324893116950989, | |
| "eval_runtime": 500.8057, | |
| "eval_samples_per_second": 49.608, | |
| "eval_steps_per_second": 24.804, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 48.65, | |
| "learning_rate": 8.417508417508417e-06, | |
| "loss": 0.3348, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 48.65, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33247503638267517, | |
| "eval_runtime": 501.0569, | |
| "eval_samples_per_second": 49.583, | |
| "eval_steps_per_second": 24.792, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 48.9, | |
| "learning_rate": 6.7340067340067325e-06, | |
| "loss": 0.3327, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 48.9, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33261463046073914, | |
| "eval_runtime": 501.0903, | |
| "eval_samples_per_second": 49.58, | |
| "eval_steps_per_second": 24.79, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 49.15, | |
| "learning_rate": 5.05050505050505e-06, | |
| "loss": 0.3341, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 49.15, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33255448937416077, | |
| "eval_runtime": 500.9292, | |
| "eval_samples_per_second": 49.596, | |
| "eval_steps_per_second": 24.798, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 49.4, | |
| "learning_rate": 3.3670033670033663e-06, | |
| "loss": 0.3344, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 49.4, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33247339725494385, | |
| "eval_runtime": 501.4588, | |
| "eval_samples_per_second": 49.543, | |
| "eval_steps_per_second": 24.772, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 49.65, | |
| "learning_rate": 1.6835016835016831e-06, | |
| "loss": 0.3207, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 49.65, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33257797360420227, | |
| "eval_runtime": 501.4807, | |
| "eval_samples_per_second": 49.541, | |
| "eval_steps_per_second": 24.771, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 49.91, | |
| "learning_rate": 0.0, | |
| "loss": 0.3299, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 49.91, | |
| "eval_accuracy": 0.8974400257607471, | |
| "eval_loss": 0.33260539174079895, | |
| "eval_runtime": 501.1869, | |
| "eval_samples_per_second": 49.57, | |
| "eval_steps_per_second": 24.785, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 49.91, | |
| "step": 19800, | |
| "total_flos": 7.53101543607702e+19, | |
| "train_loss": 0.3200095210412536, | |
| "train_runtime": 116243.9445, | |
| "train_samples_per_second": 10.917, | |
| "train_steps_per_second": 0.17 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 19800, | |
| "num_train_epochs": 50, | |
| "save_steps": 100, | |
| "total_flos": 7.53101543607702e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |