{ "best_metric": 0.09604515880346298, "best_model_checkpoint": "./timit-english-v2/checkpoint-6500", "epoch": 100.0, "global_step": 6500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.54, "learning_rate": 2.9999999999999997e-05, "loss": 2.2189, "step": 100 }, { "epoch": 3.08, "learning_rate": 5.9999999999999995e-05, "loss": 2.1219, "step": 200 }, { "epoch": 4.62, "learning_rate": 8.999999999999999e-05, "loss": 1.4984, "step": 300 }, { "epoch": 6.15, "learning_rate": 0.00011999999999999999, "loss": 0.6169, "step": 400 }, { "epoch": 7.69, "learning_rate": 0.00015, "loss": 0.3111, "step": 500 }, { "epoch": 7.69, "eval_cer": 0.12291518457038969, "eval_loss": 0.2245599329471588, "eval_runtime": 23.772, "eval_samples_per_second": 19.435, "eval_steps_per_second": 1.22, "step": 500 }, { "epoch": 9.23, "learning_rate": 0.00017999999999999998, "loss": 0.2147, "step": 600 }, { "epoch": 10.77, "learning_rate": 0.00020999999999999998, "loss": 0.1641, "step": 700 }, { "epoch": 12.31, "learning_rate": 0.00023999999999999998, "loss": 0.1424, "step": 800 }, { "epoch": 13.85, "learning_rate": 0.00027, "loss": 0.1333, "step": 900 }, { "epoch": 15.38, "learning_rate": 0.0003, "loss": 0.1211, "step": 1000 }, { "epoch": 15.38, "eval_cer": 0.07975654098490238, "eval_loss": 0.14135180413722992, "eval_runtime": 24.275, "eval_samples_per_second": 19.032, "eval_steps_per_second": 1.195, "step": 1000 }, { "epoch": 16.92, "learning_rate": 0.0002945454545454545, "loss": 0.1159, "step": 1100 }, { "epoch": 18.46, "learning_rate": 0.00028909090909090904, "loss": 0.0959, "step": 1200 }, { "epoch": 20.0, "learning_rate": 0.0002836363636363636, "loss": 0.0881, "step": 1300 }, { "epoch": 21.54, "learning_rate": 0.00027818181818181815, "loss": 0.0768, "step": 1400 }, { "epoch": 23.08, "learning_rate": 0.0002727272727272727, "loss": 0.07, "step": 1500 }, { "epoch": 23.08, "eval_cer": 0.0627618370089321, "eval_loss": 0.11634844541549683, "eval_runtime": 24.2853, "eval_samples_per_second": 19.024, "eval_steps_per_second": 1.194, "step": 1500 }, { "epoch": 24.62, "learning_rate": 0.0002672727272727272, "loss": 0.0613, "step": 1600 }, { "epoch": 26.15, "learning_rate": 0.0002618181818181818, "loss": 0.0558, "step": 1700 }, { "epoch": 27.69, "learning_rate": 0.00025636363636363633, "loss": 0.0556, "step": 1800 }, { "epoch": 29.23, "learning_rate": 0.00025090909090909086, "loss": 0.0491, "step": 1900 }, { "epoch": 30.77, "learning_rate": 0.00024545454545454545, "loss": 0.0516, "step": 2000 }, { "epoch": 30.77, "eval_cer": 0.07082444075567149, "eval_loss": 0.12194966524839401, "eval_runtime": 32.2119, "eval_samples_per_second": 14.343, "eval_steps_per_second": 0.9, "step": 2000 }, { "epoch": 32.31, "learning_rate": 0.00023999999999999998, "loss": 0.0449, "step": 2100 }, { "epoch": 33.85, "learning_rate": 0.00023454545454545454, "loss": 0.0365, "step": 2200 }, { "epoch": 35.38, "learning_rate": 0.00022909090909090907, "loss": 0.0362, "step": 2300 }, { "epoch": 36.92, "learning_rate": 0.00022363636363636363, "loss": 0.0374, "step": 2400 }, { "epoch": 38.46, "learning_rate": 0.00021818181818181816, "loss": 0.0338, "step": 2500 }, { "epoch": 38.46, "eval_cer": 0.06363133349142361, "eval_loss": 0.10956428200006485, "eval_runtime": 32.1273, "eval_samples_per_second": 14.38, "eval_steps_per_second": 0.903, "step": 2500 }, { "epoch": 40.0, "learning_rate": 0.00021272727272727272, "loss": 0.0315, "step": 2600 }, { "epoch": 41.54, "learning_rate": 0.00020727272727272725, "loss": 0.0318, "step": 2700 }, { "epoch": 43.08, "learning_rate": 0.0002018181818181818, "loss": 0.0259, "step": 2800 }, { "epoch": 44.62, "learning_rate": 0.00019636363636363634, "loss": 0.0264, "step": 2900 }, { "epoch": 46.15, "learning_rate": 0.0001909090909090909, "loss": 0.0256, "step": 3000 }, { "epoch": 46.15, "eval_cer": 0.05438305272310489, "eval_loss": 0.11163550615310669, "eval_runtime": 32.5342, "eval_samples_per_second": 14.2, "eval_steps_per_second": 0.891, "step": 3000 }, { "epoch": 47.69, "learning_rate": 0.00018545454545454543, "loss": 0.0255, "step": 3100 }, { "epoch": 49.23, "learning_rate": 0.00017999999999999998, "loss": 0.0221, "step": 3200 }, { "epoch": 50.77, "learning_rate": 0.00017454545454545452, "loss": 0.0242, "step": 3300 }, { "epoch": 52.31, "learning_rate": 0.00016909090909090907, "loss": 0.0196, "step": 3400 }, { "epoch": 53.85, "learning_rate": 0.0001636363636363636, "loss": 0.0226, "step": 3500 }, { "epoch": 53.85, "eval_cer": 0.047664216267488735, "eval_loss": 0.10824603587388992, "eval_runtime": 32.9539, "eval_samples_per_second": 14.02, "eval_steps_per_second": 0.88, "step": 3500 }, { "epoch": 55.38, "learning_rate": 0.00015818181818181816, "loss": 0.0185, "step": 3600 }, { "epoch": 56.92, "learning_rate": 0.0001527272727272727, "loss": 0.0173, "step": 3700 }, { "epoch": 58.46, "learning_rate": 0.00014727272727272725, "loss": 0.0141, "step": 3800 }, { "epoch": 60.0, "learning_rate": 0.0001418181818181818, "loss": 0.0168, "step": 3900 }, { "epoch": 61.54, "learning_rate": 0.00013636363636363634, "loss": 0.016, "step": 4000 }, { "epoch": 61.54, "eval_cer": 0.052644059758121885, "eval_loss": 0.1161409541964531, "eval_runtime": 32.1, "eval_samples_per_second": 14.393, "eval_steps_per_second": 0.903, "step": 4000 }, { "epoch": 63.08, "learning_rate": 0.0001309090909090909, "loss": 0.0154, "step": 4100 }, { "epoch": 64.62, "learning_rate": 0.00012545454545454543, "loss": 0.0138, "step": 4200 }, { "epoch": 66.15, "learning_rate": 0.00011999999999999999, "loss": 0.0123, "step": 4300 }, { "epoch": 67.69, "learning_rate": 0.00011454545454545453, "loss": 0.011, "step": 4400 }, { "epoch": 69.23, "learning_rate": 0.00010909090909090908, "loss": 0.0123, "step": 4500 }, { "epoch": 69.23, "eval_cer": 0.05873053513556241, "eval_loss": 0.11413775384426117, "eval_runtime": 37.8046, "eval_samples_per_second": 12.221, "eval_steps_per_second": 0.767, "step": 4500 }, { "epoch": 70.77, "learning_rate": 0.00010363636363636362, "loss": 0.011, "step": 4600 }, { "epoch": 72.31, "learning_rate": 9.818181818181817e-05, "loss": 0.0107, "step": 4700 }, { "epoch": 73.85, "learning_rate": 9.272727272727271e-05, "loss": 0.009, "step": 4800 }, { "epoch": 75.38, "learning_rate": 8.727272727272726e-05, "loss": 0.0092, "step": 4900 }, { "epoch": 76.92, "learning_rate": 8.18181818181818e-05, "loss": 0.008, "step": 5000 }, { "epoch": 76.92, "eval_cer": 0.0479013516718046, "eval_loss": 0.09953264147043228, "eval_runtime": 32.7656, "eval_samples_per_second": 14.1, "eval_steps_per_second": 0.885, "step": 5000 }, { "epoch": 78.46, "learning_rate": 7.636363636363635e-05, "loss": 0.0091, "step": 5100 }, { "epoch": 80.0, "learning_rate": 7.09090909090909e-05, "loss": 0.0068, "step": 5200 }, { "epoch": 81.54, "learning_rate": 6.545454545454545e-05, "loss": 0.007, "step": 5300 }, { "epoch": 83.08, "learning_rate": 5.9999999999999995e-05, "loss": 0.0071, "step": 5400 }, { "epoch": 84.62, "learning_rate": 5.454545454545454e-05, "loss": 0.0065, "step": 5500 }, { "epoch": 84.62, "eval_cer": 0.05130029246699866, "eval_loss": 0.10165167599916458, "eval_runtime": 29.0978, "eval_samples_per_second": 15.878, "eval_steps_per_second": 0.997, "step": 5500 }, { "epoch": 86.15, "learning_rate": 4.9090909090909084e-05, "loss": 0.0056, "step": 5600 }, { "epoch": 87.69, "learning_rate": 4.363636363636363e-05, "loss": 0.0058, "step": 5700 }, { "epoch": 89.23, "learning_rate": 3.8181818181818174e-05, "loss": 0.0047, "step": 5800 }, { "epoch": 90.77, "learning_rate": 3.2727272727272725e-05, "loss": 0.0055, "step": 5900 }, { "epoch": 92.31, "learning_rate": 2.727272727272727e-05, "loss": 0.0041, "step": 6000 }, { "epoch": 92.31, "eval_cer": 0.0445814560113825, "eval_loss": 0.10709430277347565, "eval_runtime": 35.6854, "eval_samples_per_second": 12.946, "eval_steps_per_second": 0.813, "step": 6000 }, { "epoch": 93.85, "learning_rate": 2.1818181818181814e-05, "loss": 0.0041, "step": 6100 }, { "epoch": 95.38, "learning_rate": 1.6363636363636363e-05, "loss": 0.0043, "step": 6200 }, { "epoch": 96.92, "learning_rate": 1.0909090909090907e-05, "loss": 0.0047, "step": 6300 }, { "epoch": 98.46, "learning_rate": 5.454545454545454e-06, "loss": 0.004, "step": 6400 }, { "epoch": 100.0, "learning_rate": 0.0, "loss": 0.0033, "step": 6500 }, { "epoch": 100.0, "eval_cer": 0.04687376491976919, "eval_loss": 0.09604515880346298, "eval_runtime": 33.0546, "eval_samples_per_second": 13.977, "eval_steps_per_second": 0.877, "step": 6500 } ], "max_steps": 6500, "num_train_epochs": 100, "total_flos": 4.026656586456467e+19, "trial_name": null, "trial_params": null }