{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 1000.0, "global_step": 5495, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09099181073703366, "grad_norm": 1.2852333784103394, "learning_rate": 3.080335788799027e-05, "loss": 0.1592, "step": 100 }, { "epoch": 0.18198362147406733, "grad_norm": 1.0739330053329468, "learning_rate": 4.1080243101273595e-05, "loss": 0.1603, "step": 200 }, { "epoch": 0.272975432211101, "grad_norm": 0.9484991431236267, "learning_rate": 4.030441225894548e-05, "loss": 0.1797, "step": 300 }, { "epoch": 0.36396724294813465, "grad_norm": 1.8888756036758423, "learning_rate": 3.9528581416617364e-05, "loss": 0.1754, "step": 400 }, { "epoch": 0.4549590536851683, "grad_norm": 1.1603392362594604, "learning_rate": 3.8752750574289256e-05, "loss": 0.1818, "step": 500 }, { "epoch": 0.545950864422202, "grad_norm": 1.3092777729034424, "learning_rate": 3.797691973196114e-05, "loss": 0.1805, "step": 600 }, { "epoch": 0.6369426751592356, "grad_norm": 1.2778502702713013, "learning_rate": 3.7201088889633025e-05, "loss": 0.1642, "step": 700 }, { "epoch": 0.7279344858962693, "grad_norm": 3.04357647895813, "learning_rate": 3.642525804730491e-05, "loss": 0.1626, "step": 800 }, { "epoch": 0.818926296633303, "grad_norm": 0.8114387392997742, "learning_rate": 3.56494272049768e-05, "loss": 0.1625, "step": 900 }, { "epoch": 0.9099181073703366, "grad_norm": 1.6360591650009155, "learning_rate": 3.4873596362648686e-05, "loss": 0.1644, "step": 1000 }, { "epoch": 0.9099181073703366, "eval_cer": 0.17117944011014227, "eval_loss": 0.24234530329704285, "eval_runtime": 16.3456, "eval_samples_per_second": 30.589, "eval_steps_per_second": 0.979, "eval_wer": 0.36909142371921877, "step": 1000 }, { "epoch": 1.0009099181073704, "grad_norm": 1.0669325590133667, "learning_rate": 3.409776552032057e-05, "loss": 0.1556, "step": 1100 }, { "epoch": 1.091901728844404, "grad_norm": 0.9532307386398315, "learning_rate": 3.332193467799246e-05, "loss": 0.1368, "step": 1200 }, { "epoch": 1.1828935395814377, "grad_norm": 2.0215086936950684, "learning_rate": 3.254610383566435e-05, "loss": 0.1458, "step": 1300 }, { "epoch": 1.2738853503184713, "grad_norm": 1.452370285987854, "learning_rate": 3.177027299333623e-05, "loss": 0.1433, "step": 1400 }, { "epoch": 1.364877161055505, "grad_norm": 0.7807812690734863, "learning_rate": 3.099444215100812e-05, "loss": 0.1433, "step": 1500 }, { "epoch": 1.4558689717925386, "grad_norm": 0.5693336129188538, "learning_rate": 3.0218611308680008e-05, "loss": 0.1372, "step": 1600 }, { "epoch": 1.5468607825295724, "grad_norm": 1.0194613933563232, "learning_rate": 2.9442780466351896e-05, "loss": 0.1429, "step": 1700 }, { "epoch": 1.6378525932666061, "grad_norm": 1.2318494319915771, "learning_rate": 2.866694962402378e-05, "loss": 0.1403, "step": 1800 }, { "epoch": 1.7288444040036397, "grad_norm": 0.6520742774009705, "learning_rate": 2.789111878169567e-05, "loss": 0.1442, "step": 1900 }, { "epoch": 1.8198362147406733, "grad_norm": 0.8180395364761353, "learning_rate": 2.7115287939367557e-05, "loss": 0.1373, "step": 2000 }, { "epoch": 1.8198362147406733, "eval_cer": 0.1699278234386082, "eval_loss": 0.23588787019252777, "eval_runtime": 16.2786, "eval_samples_per_second": 30.715, "eval_steps_per_second": 0.983, "eval_wer": 0.3586187376167563, "step": 2000 }, { "epoch": 1.910828025477707, "grad_norm": 0.929227352142334, "learning_rate": 2.6339457097039442e-05, "loss": 0.1407, "step": 2100 }, { "epoch": 2.001819836214741, "grad_norm": 1.137143850326538, "learning_rate": 2.556362625471133e-05, "loss": 0.1363, "step": 2200 }, { "epoch": 2.092811646951774, "grad_norm": 1.959381341934204, "learning_rate": 2.4787795412383218e-05, "loss": 0.1274, "step": 2300 }, { "epoch": 2.183803457688808, "grad_norm": 0.8095592856407166, "learning_rate": 2.4011964570055103e-05, "loss": 0.1288, "step": 2400 }, { "epoch": 2.2747952684258417, "grad_norm": 0.717786431312561, "learning_rate": 2.323613372772699e-05, "loss": 0.1314, "step": 2500 }, { "epoch": 2.3657870791628755, "grad_norm": 1.3463408946990967, "learning_rate": 2.246030288539888e-05, "loss": 0.1243, "step": 2600 }, { "epoch": 2.4567788898999092, "grad_norm": 1.703278660774231, "learning_rate": 2.1684472043070764e-05, "loss": 0.1226, "step": 2700 }, { "epoch": 2.5477707006369426, "grad_norm": 1.0250530242919922, "learning_rate": 2.0908641200742652e-05, "loss": 0.1217, "step": 2800 }, { "epoch": 2.6387625113739763, "grad_norm": 2.0595955848693848, "learning_rate": 2.0132810358414537e-05, "loss": 0.1162, "step": 2900 }, { "epoch": 2.72975432211101, "grad_norm": 0.5484445691108704, "learning_rate": 1.9356979516086425e-05, "loss": 0.1219, "step": 3000 }, { "epoch": 2.72975432211101, "eval_cer": 0.16654845842546623, "eval_loss": 0.2350655198097229, "eval_runtime": 16.364, "eval_samples_per_second": 30.555, "eval_steps_per_second": 0.978, "eval_wer": 0.34871214265496747, "step": 3000 }, { "epoch": 2.8207461328480434, "grad_norm": 0.8254183530807495, "learning_rate": 1.858114867375831e-05, "loss": 0.1302, "step": 3100 }, { "epoch": 2.911737943585077, "grad_norm": 1.559329867362976, "learning_rate": 1.7805317831430197e-05, "loss": 0.1224, "step": 3200 }, { "epoch": 3.002729754322111, "grad_norm": 0.9372493624687195, "learning_rate": 1.7029486989102082e-05, "loss": 0.1186, "step": 3300 }, { "epoch": 3.0937215650591448, "grad_norm": 0.5558441877365112, "learning_rate": 1.625365614677397e-05, "loss": 0.1084, "step": 3400 }, { "epoch": 3.1847133757961785, "grad_norm": 0.9594938158988953, "learning_rate": 1.547782530444586e-05, "loss": 0.114, "step": 3500 }, { "epoch": 3.275705186533212, "grad_norm": 0.835418164730072, "learning_rate": 1.4701994462117745e-05, "loss": 0.1099, "step": 3600 }, { "epoch": 3.3666969972702456, "grad_norm": 1.4973269701004028, "learning_rate": 1.3926163619789631e-05, "loss": 0.1111, "step": 3700 }, { "epoch": 3.4576888080072794, "grad_norm": 0.5722721219062805, "learning_rate": 1.3150332777461518e-05, "loss": 0.1117, "step": 3800 }, { "epoch": 3.548680618744313, "grad_norm": 1.0630409717559814, "learning_rate": 1.2374501935133406e-05, "loss": 0.1171, "step": 3900 }, { "epoch": 3.6396724294813465, "grad_norm": 4.837808609008789, "learning_rate": 1.1598671092805292e-05, "loss": 0.1106, "step": 4000 }, { "epoch": 3.6396724294813465, "eval_cer": 0.16671534064833743, "eval_loss": 0.23994748294353485, "eval_runtime": 16.3865, "eval_samples_per_second": 30.513, "eval_steps_per_second": 0.976, "eval_wer": 0.3447495046702519, "step": 4000 }, { "epoch": 3.7306642402183803, "grad_norm": 1.5471532344818115, "learning_rate": 1.0822840250477179e-05, "loss": 0.1061, "step": 4100 }, { "epoch": 3.821656050955414, "grad_norm": 0.7752039432525635, "learning_rate": 1.0047009408149065e-05, "loss": 0.1032, "step": 4200 }, { "epoch": 3.912647861692448, "grad_norm": 1.4473958015441895, "learning_rate": 9.271178565820951e-06, "loss": 0.1117, "step": 4300 }, { "epoch": 4.003639672429482, "grad_norm": 1.3828502893447876, "learning_rate": 8.495347723492838e-06, "loss": 0.1089, "step": 4400 }, { "epoch": 4.094631483166515, "grad_norm": 0.6974540948867798, "learning_rate": 7.719516881164726e-06, "loss": 0.1072, "step": 4500 }, { "epoch": 4.185623293903548, "grad_norm": 0.8870043158531189, "learning_rate": 6.943686038836612e-06, "loss": 0.1028, "step": 4600 }, { "epoch": 4.276615104640582, "grad_norm": 0.685329020023346, "learning_rate": 6.1678551965085e-06, "loss": 0.1063, "step": 4700 }, { "epoch": 4.367606915377616, "grad_norm": 0.7879564762115479, "learning_rate": 5.392024354180386e-06, "loss": 0.0988, "step": 4800 }, { "epoch": 4.45859872611465, "grad_norm": 0.7849826216697693, "learning_rate": 4.616193511852273e-06, "loss": 0.1055, "step": 4900 }, { "epoch": 4.549590536851683, "grad_norm": 0.9511623382568359, "learning_rate": 3.840362669524161e-06, "loss": 0.0986, "step": 5000 }, { "epoch": 4.549590536851683, "eval_cer": 0.16387834285952688, "eval_loss": 0.24175503849983215, "eval_runtime": 16.3966, "eval_samples_per_second": 30.494, "eval_steps_per_second": 0.976, "eval_wer": 0.3388055476931786, "step": 5000 }, { "epoch": 4.640582347588717, "grad_norm": 1.7066700458526611, "learning_rate": 3.064531827196047e-06, "loss": 0.1091, "step": 5100 }, { "epoch": 4.731574158325751, "grad_norm": 2.1451282501220703, "learning_rate": 2.288700984867934e-06, "loss": 0.1034, "step": 5200 }, { "epoch": 4.822565969062785, "grad_norm": 1.3042198419570923, "learning_rate": 1.5128701425398207e-06, "loss": 0.1025, "step": 5300 }, { "epoch": 4.9135577797998184, "grad_norm": 0.7029935717582703, "learning_rate": 7.370393002117075e-07, "loss": 0.1028, "step": 5400 }, { "epoch": 5.0, "step": 5495, "total_flos": 4.052184710714386e+19, "train_loss": 0.1293755126064533, "train_runtime": 8966.103, "train_samples_per_second": 19.598, "train_steps_per_second": 0.613 } ], "logging_steps": 100, "max_steps": 5495, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.052184710714386e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }