{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 1000.0, "global_step": 5495, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09099181073703366, "grad_norm": 1.2458827495574951, "learning_rate": 7.101063243757922e-06, "loss": 0.1615, "step": 100 }, { "epoch": 0.18198362147406733, "grad_norm": 1.1883313655853271, "learning_rate": 1.3026593668101051e-05, "loss": 0.1645, "step": 200 }, { "epoch": 0.272975432211101, "grad_norm": 1.536879539489746, "learning_rate": 1.2780576790516517e-05, "loss": 0.1829, "step": 300 }, { "epoch": 0.36396724294813465, "grad_norm": 1.4138495922088623, "learning_rate": 1.2534559912931985e-05, "loss": 0.1856, "step": 400 }, { "epoch": 0.4549590536851683, "grad_norm": 1.4161380529403687, "learning_rate": 1.2288543035347451e-05, "loss": 0.1883, "step": 500 }, { "epoch": 0.545950864422202, "grad_norm": 0.7464113235473633, "learning_rate": 1.2042526157762917e-05, "loss": 0.1852, "step": 600 }, { "epoch": 0.6369426751592356, "grad_norm": 1.0659712553024292, "learning_rate": 1.1796509280178383e-05, "loss": 0.175, "step": 700 }, { "epoch": 0.7279344858962693, "grad_norm": 0.9812346696853638, "learning_rate": 1.155049240259385e-05, "loss": 0.1738, "step": 800 }, { "epoch": 0.818926296633303, "grad_norm": 0.6952521204948425, "learning_rate": 1.1304475525009316e-05, "loss": 0.1648, "step": 900 }, { "epoch": 0.9099181073703366, "grad_norm": 1.3266983032226562, "learning_rate": 1.1058458647424782e-05, "loss": 0.1695, "step": 1000 }, { "epoch": 0.9099181073703366, "eval_cer": 0.17138804288873127, "eval_loss": 0.24572527408599854, "eval_runtime": 16.2175, "eval_samples_per_second": 30.831, "eval_steps_per_second": 0.987, "eval_wer": 0.37248797056326066, "step": 1000 }, { "epoch": 1.0009099181073704, "grad_norm": 0.9983859658241272, "learning_rate": 1.081244176984025e-05, "loss": 0.1606, "step": 1100 }, { "epoch": 1.091901728844404, "grad_norm": 0.9436022639274597, "learning_rate": 1.0566424892255716e-05, "loss": 0.1479, "step": 1200 }, { "epoch": 1.1828935395814377, "grad_norm": 1.3033909797668457, "learning_rate": 1.0320408014671182e-05, "loss": 0.1526, "step": 1300 }, { "epoch": 1.2738853503184713, "grad_norm": 1.919601559638977, "learning_rate": 1.007439113708665e-05, "loss": 0.1549, "step": 1400 }, { "epoch": 1.364877161055505, "grad_norm": 2.1733407974243164, "learning_rate": 9.828374259502114e-06, "loss": 0.1538, "step": 1500 }, { "epoch": 1.4558689717925386, "grad_norm": 0.9706993699073792, "learning_rate": 9.582357381917582e-06, "loss": 0.1447, "step": 1600 }, { "epoch": 1.5468607825295724, "grad_norm": 2.0929415225982666, "learning_rate": 9.33634050433305e-06, "loss": 0.1513, "step": 1700 }, { "epoch": 1.6378525932666061, "grad_norm": 1.7381020784378052, "learning_rate": 9.090323626748514e-06, "loss": 0.1512, "step": 1800 }, { "epoch": 1.7288444040036397, "grad_norm": 4.781813621520996, "learning_rate": 8.844306749163981e-06, "loss": 0.157, "step": 1900 }, { "epoch": 1.8198362147406733, "grad_norm": 1.9795459508895874, "learning_rate": 8.598289871579447e-06, "loss": 0.1502, "step": 2000 }, { "epoch": 1.8198362147406733, "eval_cer": 0.170386749551504, "eval_loss": 0.23442533612251282, "eval_runtime": 16.2106, "eval_samples_per_second": 30.844, "eval_steps_per_second": 0.987, "eval_wer": 0.36399660345315593, "step": 2000 }, { "epoch": 1.910828025477707, "grad_norm": 1.2681164741516113, "learning_rate": 8.352272993994913e-06, "loss": 0.1512, "step": 2100 }, { "epoch": 2.001819836214741, "grad_norm": 0.8989790081977844, "learning_rate": 8.106256116410381e-06, "loss": 0.1498, "step": 2200 }, { "epoch": 2.092811646951774, "grad_norm": 2.716948986053467, "learning_rate": 7.860239238825847e-06, "loss": 0.1379, "step": 2300 }, { "epoch": 2.183803457688808, "grad_norm": 1.0314233303070068, "learning_rate": 7.614222361241313e-06, "loss": 0.1375, "step": 2400 }, { "epoch": 2.2747952684258417, "grad_norm": 1.034842848777771, "learning_rate": 7.3682054836567806e-06, "loss": 0.1424, "step": 2500 }, { "epoch": 2.3657870791628755, "grad_norm": 1.1963162422180176, "learning_rate": 7.122188606072246e-06, "loss": 0.1371, "step": 2600 }, { "epoch": 2.4567788898999092, "grad_norm": 1.1049108505249023, "learning_rate": 6.8761717284877125e-06, "loss": 0.1346, "step": 2700 }, { "epoch": 2.5477707006369426, "grad_norm": 1.5126423835754395, "learning_rate": 6.630154850903179e-06, "loss": 0.1312, "step": 2800 }, { "epoch": 2.6387625113739763, "grad_norm": 1.445042610168457, "learning_rate": 6.384137973318645e-06, "loss": 0.1236, "step": 2900 }, { "epoch": 2.72975432211101, "grad_norm": 0.6577618718147278, "learning_rate": 6.138121095734112e-06, "loss": 0.1287, "step": 3000 }, { "epoch": 2.72975432211101, "eval_cer": 0.16905169176853435, "eval_loss": 0.2321455329656601, "eval_runtime": 16.3024, "eval_samples_per_second": 30.67, "eval_steps_per_second": 0.981, "eval_wer": 0.36173223889046136, "step": 3000 }, { "epoch": 2.8207461328480434, "grad_norm": 0.7591854929924011, "learning_rate": 5.892104218149578e-06, "loss": 0.1398, "step": 3100 }, { "epoch": 2.911737943585077, "grad_norm": 0.9334970116615295, "learning_rate": 5.646087340565045e-06, "loss": 0.1333, "step": 3200 }, { "epoch": 3.002729754322111, "grad_norm": 1.2228902578353882, "learning_rate": 5.400070462980511e-06, "loss": 0.1339, "step": 3300 }, { "epoch": 3.0937215650591448, "grad_norm": 0.6372168064117432, "learning_rate": 5.154053585395978e-06, "loss": 0.1219, "step": 3400 }, { "epoch": 3.1847133757961785, "grad_norm": 1.18144690990448, "learning_rate": 4.9080367078114446e-06, "loss": 0.1253, "step": 3500 }, { "epoch": 3.275705186533212, "grad_norm": 0.8614761829376221, "learning_rate": 4.6620198302269105e-06, "loss": 0.1202, "step": 3600 }, { "epoch": 3.3666969972702456, "grad_norm": 1.8885802030563354, "learning_rate": 4.4160029526423765e-06, "loss": 0.1225, "step": 3700 }, { "epoch": 3.4576888080072794, "grad_norm": 0.8055128455162048, "learning_rate": 4.169986075057844e-06, "loss": 0.1288, "step": 3800 }, { "epoch": 3.548680618744313, "grad_norm": 0.6375197172164917, "learning_rate": 3.92396919747331e-06, "loss": 0.1291, "step": 3900 }, { "epoch": 3.6396724294813465, "grad_norm": 0.9885872602462769, "learning_rate": 3.6779523198887766e-06, "loss": 0.1248, "step": 4000 }, { "epoch": 3.6396724294813465, "eval_cer": 0.16650673786974843, "eval_loss": 0.2386896312236786, "eval_runtime": 16.2576, "eval_samples_per_second": 30.755, "eval_steps_per_second": 0.984, "eval_wer": 0.3560713274837249, "step": 4000 }, { "epoch": 3.7306642402183803, "grad_norm": 1.435037612915039, "learning_rate": 3.4319354423042425e-06, "loss": 0.1191, "step": 4100 }, { "epoch": 3.821656050955414, "grad_norm": 1.0509116649627686, "learning_rate": 3.1859185647197094e-06, "loss": 0.1144, "step": 4200 }, { "epoch": 3.912647861692448, "grad_norm": 1.5290184020996094, "learning_rate": 2.9399016871351758e-06, "loss": 0.1244, "step": 4300 }, { "epoch": 4.003639672429482, "grad_norm": 1.2121403217315674, "learning_rate": 2.693884809550642e-06, "loss": 0.1161, "step": 4400 }, { "epoch": 4.094631483166515, "grad_norm": 1.786240816116333, "learning_rate": 2.447867931966109e-06, "loss": 0.1209, "step": 4500 }, { "epoch": 4.185623293903548, "grad_norm": 1.2012931108474731, "learning_rate": 2.201851054381575e-06, "loss": 0.1176, "step": 4600 }, { "epoch": 4.276615104640582, "grad_norm": 0.8693597316741943, "learning_rate": 1.9558341767970418e-06, "loss": 0.1208, "step": 4700 }, { "epoch": 4.367606915377616, "grad_norm": 1.355154037475586, "learning_rate": 1.709817299212508e-06, "loss": 0.1119, "step": 4800 }, { "epoch": 4.45859872611465, "grad_norm": 1.7125566005706787, "learning_rate": 1.4638004216279746e-06, "loss": 0.1229, "step": 4900 }, { "epoch": 4.549590536851683, "grad_norm": 0.8439257144927979, "learning_rate": 1.2177835440434412e-06, "loss": 0.1127, "step": 5000 }, { "epoch": 4.549590536851683, "eval_cer": 0.16671534064833743, "eval_loss": 0.23713621497154236, "eval_runtime": 16.2282, "eval_samples_per_second": 30.811, "eval_steps_per_second": 0.986, "eval_wer": 0.3492782337956411, "step": 5000 }, { "epoch": 4.640582347588717, "grad_norm": 1.417340636253357, "learning_rate": 9.717666664589076e-07, "loss": 0.1241, "step": 5100 }, { "epoch": 4.731574158325751, "grad_norm": 2.014954090118408, "learning_rate": 7.25749788874374e-07, "loss": 0.1165, "step": 5200 }, { "epoch": 4.822565969062785, "grad_norm": 1.0176433324813843, "learning_rate": 4.797329112898405e-07, "loss": 0.1195, "step": 5300 }, { "epoch": 4.9135577797998184, "grad_norm": 1.0092387199401855, "learning_rate": 2.3371603370530685e-07, "loss": 0.117, "step": 5400 }, { "epoch": 5.0, "step": 5495, "total_flos": 4.052184710714386e+19, "train_loss": 0.13981490825066467, "train_runtime": 8884.6541, "train_samples_per_second": 19.778, "train_steps_per_second": 0.618 } ], "logging_steps": 100, "max_steps": 5495, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.052184710714386e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }