| { | |
| "best_metric": 0.09604515880346298, | |
| "best_model_checkpoint": "./timit-english-v2/checkpoint-6500", | |
| "epoch": 100.0, | |
| "global_step": 6500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.9999999999999997e-05, | |
| "loss": 2.2189, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 5.9999999999999995e-05, | |
| "loss": 2.1219, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 8.999999999999999e-05, | |
| "loss": 1.4984, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 0.00011999999999999999, | |
| "loss": 0.6169, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 0.00015, | |
| "loss": 0.3111, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "eval_cer": 0.12291518457038969, | |
| "eval_loss": 0.2245599329471588, | |
| "eval_runtime": 23.772, | |
| "eval_samples_per_second": 19.435, | |
| "eval_steps_per_second": 1.22, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 0.00017999999999999998, | |
| "loss": 0.2147, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 10.77, | |
| "learning_rate": 0.00020999999999999998, | |
| "loss": 0.1641, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 12.31, | |
| "learning_rate": 0.00023999999999999998, | |
| "loss": 0.1424, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 13.85, | |
| "learning_rate": 0.00027, | |
| "loss": 0.1333, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 15.38, | |
| "learning_rate": 0.0003, | |
| "loss": 0.1211, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 15.38, | |
| "eval_cer": 0.07975654098490238, | |
| "eval_loss": 0.14135180413722992, | |
| "eval_runtime": 24.275, | |
| "eval_samples_per_second": 19.032, | |
| "eval_steps_per_second": 1.195, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 16.92, | |
| "learning_rate": 0.0002945454545454545, | |
| "loss": 0.1159, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 18.46, | |
| "learning_rate": 0.00028909090909090904, | |
| "loss": 0.0959, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 0.0002836363636363636, | |
| "loss": 0.0881, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 21.54, | |
| "learning_rate": 0.00027818181818181815, | |
| "loss": 0.0768, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 23.08, | |
| "learning_rate": 0.0002727272727272727, | |
| "loss": 0.07, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 23.08, | |
| "eval_cer": 0.0627618370089321, | |
| "eval_loss": 0.11634844541549683, | |
| "eval_runtime": 24.2853, | |
| "eval_samples_per_second": 19.024, | |
| "eval_steps_per_second": 1.194, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 24.62, | |
| "learning_rate": 0.0002672727272727272, | |
| "loss": 0.0613, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 26.15, | |
| "learning_rate": 0.0002618181818181818, | |
| "loss": 0.0558, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 27.69, | |
| "learning_rate": 0.00025636363636363633, | |
| "loss": 0.0556, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 29.23, | |
| "learning_rate": 0.00025090909090909086, | |
| "loss": 0.0491, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 30.77, | |
| "learning_rate": 0.00024545454545454545, | |
| "loss": 0.0516, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 30.77, | |
| "eval_cer": 0.07082444075567149, | |
| "eval_loss": 0.12194966524839401, | |
| "eval_runtime": 32.2119, | |
| "eval_samples_per_second": 14.343, | |
| "eval_steps_per_second": 0.9, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 32.31, | |
| "learning_rate": 0.00023999999999999998, | |
| "loss": 0.0449, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 33.85, | |
| "learning_rate": 0.00023454545454545454, | |
| "loss": 0.0365, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 35.38, | |
| "learning_rate": 0.00022909090909090907, | |
| "loss": 0.0362, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 36.92, | |
| "learning_rate": 0.00022363636363636363, | |
| "loss": 0.0374, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 38.46, | |
| "learning_rate": 0.00021818181818181816, | |
| "loss": 0.0338, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 38.46, | |
| "eval_cer": 0.06363133349142361, | |
| "eval_loss": 0.10956428200006485, | |
| "eval_runtime": 32.1273, | |
| "eval_samples_per_second": 14.38, | |
| "eval_steps_per_second": 0.903, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 0.00021272727272727272, | |
| "loss": 0.0315, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 41.54, | |
| "learning_rate": 0.00020727272727272725, | |
| "loss": 0.0318, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 43.08, | |
| "learning_rate": 0.0002018181818181818, | |
| "loss": 0.0259, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 44.62, | |
| "learning_rate": 0.00019636363636363634, | |
| "loss": 0.0264, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 46.15, | |
| "learning_rate": 0.0001909090909090909, | |
| "loss": 0.0256, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 46.15, | |
| "eval_cer": 0.05438305272310489, | |
| "eval_loss": 0.11163550615310669, | |
| "eval_runtime": 32.5342, | |
| "eval_samples_per_second": 14.2, | |
| "eval_steps_per_second": 0.891, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 47.69, | |
| "learning_rate": 0.00018545454545454543, | |
| "loss": 0.0255, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 49.23, | |
| "learning_rate": 0.00017999999999999998, | |
| "loss": 0.0221, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 50.77, | |
| "learning_rate": 0.00017454545454545452, | |
| "loss": 0.0242, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 52.31, | |
| "learning_rate": 0.00016909090909090907, | |
| "loss": 0.0196, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 53.85, | |
| "learning_rate": 0.0001636363636363636, | |
| "loss": 0.0226, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 53.85, | |
| "eval_cer": 0.047664216267488735, | |
| "eval_loss": 0.10824603587388992, | |
| "eval_runtime": 32.9539, | |
| "eval_samples_per_second": 14.02, | |
| "eval_steps_per_second": 0.88, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 55.38, | |
| "learning_rate": 0.00015818181818181816, | |
| "loss": 0.0185, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 56.92, | |
| "learning_rate": 0.0001527272727272727, | |
| "loss": 0.0173, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 58.46, | |
| "learning_rate": 0.00014727272727272725, | |
| "loss": 0.0141, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 0.0001418181818181818, | |
| "loss": 0.0168, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 61.54, | |
| "learning_rate": 0.00013636363636363634, | |
| "loss": 0.016, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 61.54, | |
| "eval_cer": 0.052644059758121885, | |
| "eval_loss": 0.1161409541964531, | |
| "eval_runtime": 32.1, | |
| "eval_samples_per_second": 14.393, | |
| "eval_steps_per_second": 0.903, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 63.08, | |
| "learning_rate": 0.0001309090909090909, | |
| "loss": 0.0154, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 64.62, | |
| "learning_rate": 0.00012545454545454543, | |
| "loss": 0.0138, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 66.15, | |
| "learning_rate": 0.00011999999999999999, | |
| "loss": 0.0123, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 67.69, | |
| "learning_rate": 0.00011454545454545453, | |
| "loss": 0.011, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 69.23, | |
| "learning_rate": 0.00010909090909090908, | |
| "loss": 0.0123, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 69.23, | |
| "eval_cer": 0.05873053513556241, | |
| "eval_loss": 0.11413775384426117, | |
| "eval_runtime": 37.8046, | |
| "eval_samples_per_second": 12.221, | |
| "eval_steps_per_second": 0.767, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 70.77, | |
| "learning_rate": 0.00010363636363636362, | |
| "loss": 0.011, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 72.31, | |
| "learning_rate": 9.818181818181817e-05, | |
| "loss": 0.0107, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 73.85, | |
| "learning_rate": 9.272727272727271e-05, | |
| "loss": 0.009, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 75.38, | |
| "learning_rate": 8.727272727272726e-05, | |
| "loss": 0.0092, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 76.92, | |
| "learning_rate": 8.18181818181818e-05, | |
| "loss": 0.008, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 76.92, | |
| "eval_cer": 0.0479013516718046, | |
| "eval_loss": 0.09953264147043228, | |
| "eval_runtime": 32.7656, | |
| "eval_samples_per_second": 14.1, | |
| "eval_steps_per_second": 0.885, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 78.46, | |
| "learning_rate": 7.636363636363635e-05, | |
| "loss": 0.0091, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "learning_rate": 7.09090909090909e-05, | |
| "loss": 0.0068, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 81.54, | |
| "learning_rate": 6.545454545454545e-05, | |
| "loss": 0.007, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 83.08, | |
| "learning_rate": 5.9999999999999995e-05, | |
| "loss": 0.0071, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 84.62, | |
| "learning_rate": 5.454545454545454e-05, | |
| "loss": 0.0065, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 84.62, | |
| "eval_cer": 0.05130029246699866, | |
| "eval_loss": 0.10165167599916458, | |
| "eval_runtime": 29.0978, | |
| "eval_samples_per_second": 15.878, | |
| "eval_steps_per_second": 0.997, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 86.15, | |
| "learning_rate": 4.9090909090909084e-05, | |
| "loss": 0.0056, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 87.69, | |
| "learning_rate": 4.363636363636363e-05, | |
| "loss": 0.0058, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 89.23, | |
| "learning_rate": 3.8181818181818174e-05, | |
| "loss": 0.0047, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 90.77, | |
| "learning_rate": 3.2727272727272725e-05, | |
| "loss": 0.0055, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 92.31, | |
| "learning_rate": 2.727272727272727e-05, | |
| "loss": 0.0041, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 92.31, | |
| "eval_cer": 0.0445814560113825, | |
| "eval_loss": 0.10709430277347565, | |
| "eval_runtime": 35.6854, | |
| "eval_samples_per_second": 12.946, | |
| "eval_steps_per_second": 0.813, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 93.85, | |
| "learning_rate": 2.1818181818181814e-05, | |
| "loss": 0.0041, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 95.38, | |
| "learning_rate": 1.6363636363636363e-05, | |
| "loss": 0.0043, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 96.92, | |
| "learning_rate": 1.0909090909090907e-05, | |
| "loss": 0.0047, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 98.46, | |
| "learning_rate": 5.454545454545454e-06, | |
| "loss": 0.004, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0033, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_cer": 0.04687376491976919, | |
| "eval_loss": 0.09604515880346298, | |
| "eval_runtime": 33.0546, | |
| "eval_samples_per_second": 13.977, | |
| "eval_steps_per_second": 0.877, | |
| "step": 6500 | |
| } | |
| ], | |
| "max_steps": 6500, | |
| "num_train_epochs": 100, | |
| "total_flos": 4.026656586456467e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |