| { |
| "best_global_step": 2000, |
| "best_metric": 48.24699110413396, |
| "best_model_checkpoint": "output/checkpoint-2000", |
| "epoch": 2.1691973969631237, |
| "eval_steps": 1000, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.027114967462039046, |
| "grad_norm": 14.759312629699707, |
| "learning_rate": 9.600000000000001e-07, |
| "loss": 2.1989, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05422993492407809, |
| "grad_norm": 9.411042213439941, |
| "learning_rate": 1.9600000000000003e-06, |
| "loss": 1.9684, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.08134490238611713, |
| "grad_norm": 6.554091930389404, |
| "learning_rate": 2.96e-06, |
| "loss": 1.6823, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.10845986984815618, |
| "grad_norm": 4.735639572143555, |
| "learning_rate": 3.96e-06, |
| "loss": 1.5057, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.13557483731019523, |
| "grad_norm": 5.951350688934326, |
| "learning_rate": 4.960000000000001e-06, |
| "loss": 1.4356, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.16268980477223427, |
| "grad_norm": 7.765430450439453, |
| "learning_rate": 5.9600000000000005e-06, |
| "loss": 1.3835, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1898047722342733, |
| "grad_norm": 7.22494649887085, |
| "learning_rate": 6.96e-06, |
| "loss": 1.3296, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.21691973969631237, |
| "grad_norm": 8.013534545898438, |
| "learning_rate": 7.960000000000002e-06, |
| "loss": 1.1737, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2440347071583514, |
| "grad_norm": 6.714569091796875, |
| "learning_rate": 8.96e-06, |
| "loss": 0.8834, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.27114967462039047, |
| "grad_norm": 5.424898147583008, |
| "learning_rate": 9.960000000000001e-06, |
| "loss": 0.6345, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2982646420824295, |
| "grad_norm": 5.277531623840332, |
| "learning_rate": 1.0960000000000002e-05, |
| "loss": 0.5056, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.32537960954446854, |
| "grad_norm": 5.730761528015137, |
| "learning_rate": 1.196e-05, |
| "loss": 0.4321, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3524945770065076, |
| "grad_norm": 6.359960556030273, |
| "learning_rate": 1.2960000000000001e-05, |
| "loss": 0.3845, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.3796095444685466, |
| "grad_norm": 5.229251384735107, |
| "learning_rate": 1.396e-05, |
| "loss": 0.3523, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.4067245119305857, |
| "grad_norm": 4.374991416931152, |
| "learning_rate": 1.496e-05, |
| "loss": 0.3281, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.43383947939262474, |
| "grad_norm": 3.755828857421875, |
| "learning_rate": 1.5960000000000003e-05, |
| "loss": 0.3069, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4609544468546638, |
| "grad_norm": 5.2108306884765625, |
| "learning_rate": 1.696e-05, |
| "loss": 0.2993, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.4880694143167028, |
| "grad_norm": 4.895330905914307, |
| "learning_rate": 1.796e-05, |
| "loss": 0.2924, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5151843817787418, |
| "grad_norm": 4.621529579162598, |
| "learning_rate": 1.896e-05, |
| "loss": 0.2796, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.5422993492407809, |
| "grad_norm": 3.599890947341919, |
| "learning_rate": 1.9960000000000002e-05, |
| "loss": 0.2767, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.56941431670282, |
| "grad_norm": 4.006639003753662, |
| "learning_rate": 1.9999327239243586e-05, |
| "loss": 0.2648, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.596529284164859, |
| "grad_norm": 3.9036214351654053, |
| "learning_rate": 1.9997195761821797e-05, |
| "loss": 0.259, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6236442516268981, |
| "grad_norm": 3.259342908859253, |
| "learning_rate": 1.9993604710926203e-05, |
| "loss": 0.2501, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.6507592190889371, |
| "grad_norm": 3.453716516494751, |
| "learning_rate": 1.998855461084408e-05, |
| "loss": 0.244, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6778741865509761, |
| "grad_norm": 3.6012680530548096, |
| "learning_rate": 1.9982046198881403e-05, |
| "loss": 0.2439, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.7049891540130152, |
| "grad_norm": 3.275789260864258, |
| "learning_rate": 1.997408042525518e-05, |
| "loss": 0.2379, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7321041214750542, |
| "grad_norm": 4.458703994750977, |
| "learning_rate": 1.996465845295473e-05, |
| "loss": 0.2317, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.7592190889370932, |
| "grad_norm": 3.2980549335479736, |
| "learning_rate": 1.9953781657571887e-05, |
| "loss": 0.2327, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7863340563991323, |
| "grad_norm": 3.0573136806488037, |
| "learning_rate": 1.9941451627100163e-05, |
| "loss": 0.2282, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.8134490238611713, |
| "grad_norm": 3.351287364959717, |
| "learning_rate": 1.9927670161702906e-05, |
| "loss": 0.2209, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8405639913232104, |
| "grad_norm": 2.9262197017669678, |
| "learning_rate": 1.991243927345048e-05, |
| "loss": 0.2219, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.8676789587852495, |
| "grad_norm": 3.440474033355713, |
| "learning_rate": 1.989576118602651e-05, |
| "loss": 0.2201, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8947939262472885, |
| "grad_norm": 2.9730112552642822, |
| "learning_rate": 1.987763833440322e-05, |
| "loss": 0.2117, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.9219088937093276, |
| "grad_norm": 2.7122719287872314, |
| "learning_rate": 1.9858073364485933e-05, |
| "loss": 0.2083, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9490238611713666, |
| "grad_norm": 2.5038318634033203, |
| "learning_rate": 1.9837069132726775e-05, |
| "loss": 0.2061, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.9761388286334056, |
| "grad_norm": 2.8700103759765625, |
| "learning_rate": 1.9814628705707643e-05, |
| "loss": 0.2067, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0032537960954446, |
| "grad_norm": 2.7776284217834473, |
| "learning_rate": 1.979075535969248e-05, |
| "loss": 0.1967, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.0303687635574836, |
| "grad_norm": 2.9575154781341553, |
| "learning_rate": 1.9765452580148954e-05, |
| "loss": 0.1905, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0574837310195229, |
| "grad_norm": 2.7380096912384033, |
| "learning_rate": 1.9738724061239574e-05, |
| "loss": 0.1865, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.0845986984815619, |
| "grad_norm": 2.5455238819122314, |
| "learning_rate": 1.971057370528237e-05, |
| "loss": 0.1855, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0845986984815619, |
| "eval_loss": 0.19655342400074005, |
| "eval_runtime": 123.0419, |
| "eval_samples_per_second": 7.266, |
| "eval_steps_per_second": 0.081, |
| "eval_wer": 56.07012035583464, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1117136659436009, |
| "grad_norm": 2.596179723739624, |
| "learning_rate": 1.9681005622181137e-05, |
| "loss": 0.1897, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.13882863340564, |
| "grad_norm": 2.540958881378174, |
| "learning_rate": 1.9650024128825406e-05, |
| "loss": 0.1833, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.165943600867679, |
| "grad_norm": 2.2400152683258057, |
| "learning_rate": 1.9617633748460193e-05, |
| "loss": 0.1874, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.1930585683297181, |
| "grad_norm": 2.717508316040039, |
| "learning_rate": 1.958383921002561e-05, |
| "loss": 0.1854, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.2201735357917571, |
| "grad_norm": 2.3038101196289062, |
| "learning_rate": 1.9548645447466433e-05, |
| "loss": 0.18, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.2472885032537961, |
| "grad_norm": 2.39315128326416, |
| "learning_rate": 1.951205759901177e-05, |
| "loss": 0.1823, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.2744034707158352, |
| "grad_norm": 2.5433220863342285, |
| "learning_rate": 1.947408100642489e-05, |
| "loss": 0.1764, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.3015184381778742, |
| "grad_norm": 2.687659740447998, |
| "learning_rate": 1.943472121422332e-05, |
| "loss": 0.177, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.3286334056399132, |
| "grad_norm": 2.7993059158325195, |
| "learning_rate": 1.939398396886937e-05, |
| "loss": 0.1825, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.3557483731019522, |
| "grad_norm": 2.1816444396972656, |
| "learning_rate": 1.9351875217931154e-05, |
| "loss": 0.1717, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.3828633405639914, |
| "grad_norm": 2.8464884757995605, |
| "learning_rate": 1.930840110921425e-05, |
| "loss": 0.1719, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.4099783080260304, |
| "grad_norm": 2.4136722087860107, |
| "learning_rate": 1.9263567989864135e-05, |
| "loss": 0.1673, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4370932754880694, |
| "grad_norm": 2.15059757232666, |
| "learning_rate": 1.921738240543951e-05, |
| "loss": 0.1723, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.4642082429501084, |
| "grad_norm": 2.8437695503234863, |
| "learning_rate": 1.916985109895668e-05, |
| "loss": 0.1736, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.4913232104121474, |
| "grad_norm": 2.24259090423584, |
| "learning_rate": 1.9120981009905044e-05, |
| "loss": 0.168, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.5184381778741867, |
| "grad_norm": 2.81048583984375, |
| "learning_rate": 1.907077927323398e-05, |
| "loss": 0.1728, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.5455531453362257, |
| "grad_norm": 2.333526372909546, |
| "learning_rate": 1.901925321831114e-05, |
| "loss": 0.1684, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.5726681127982647, |
| "grad_norm": 2.995264768600464, |
| "learning_rate": 1.896641036785236e-05, |
| "loss": 0.1637, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.5997830802603037, |
| "grad_norm": 2.2448666095733643, |
| "learning_rate": 1.891225843682339e-05, |
| "loss": 0.161, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.6268980477223427, |
| "grad_norm": 2.1202893257141113, |
| "learning_rate": 1.8856805331313487e-05, |
| "loss": 0.1662, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.6540130151843817, |
| "grad_norm": 2.316800594329834, |
| "learning_rate": 1.8800059147381172e-05, |
| "loss": 0.1664, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.6811279826464207, |
| "grad_norm": 2.7500040531158447, |
| "learning_rate": 1.8742028169872188e-05, |
| "loss": 0.1564, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.7082429501084597, |
| "grad_norm": 2.1926183700561523, |
| "learning_rate": 1.868272087120995e-05, |
| "loss": 0.1608, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.735357917570499, |
| "grad_norm": 2.438418388366699, |
| "learning_rate": 1.8622145910158568e-05, |
| "loss": 0.1666, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.762472885032538, |
| "grad_norm": 2.249562978744507, |
| "learning_rate": 1.8560312130558706e-05, |
| "loss": 0.1613, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.789587852494577, |
| "grad_norm": 2.3808553218841553, |
| "learning_rate": 1.849722856003637e-05, |
| "loss": 0.1591, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.8167028199566162, |
| "grad_norm": 2.087522029876709, |
| "learning_rate": 1.8432904408684912e-05, |
| "loss": 0.1569, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.8438177874186552, |
| "grad_norm": 2.3600339889526367, |
| "learning_rate": 1.836734906772035e-05, |
| "loss": 0.1575, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.8709327548806942, |
| "grad_norm": 2.239959239959717, |
| "learning_rate": 1.8300572108110287e-05, |
| "loss": 0.1578, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.8980477223427332, |
| "grad_norm": 2.024038553237915, |
| "learning_rate": 1.823258327917656e-05, |
| "loss": 0.1575, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.9251626898047722, |
| "grad_norm": 2.0424182415008545, |
| "learning_rate": 1.816339250717184e-05, |
| "loss": 0.1603, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.9522776572668112, |
| "grad_norm": 2.4677207469940186, |
| "learning_rate": 1.809300989383045e-05, |
| "loss": 0.1531, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.9793926247288502, |
| "grad_norm": 1.9991437196731567, |
| "learning_rate": 1.802144571489349e-05, |
| "loss": 0.1519, |
| "step": 1825 |
| }, |
| { |
| "epoch": 2.0065075921908893, |
| "grad_norm": 2.2862942218780518, |
| "learning_rate": 1.7948710418608626e-05, |
| "loss": 0.1476, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.0336225596529283, |
| "grad_norm": 2.52840256690979, |
| "learning_rate": 1.787481462420465e-05, |
| "loss": 0.1376, |
| "step": 1875 |
| }, |
| { |
| "epoch": 2.0607375271149673, |
| "grad_norm": 2.4488773345947266, |
| "learning_rate": 1.779976912034109e-05, |
| "loss": 0.1368, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.0878524945770067, |
| "grad_norm": 2.1182641983032227, |
| "learning_rate": 1.772358486353309e-05, |
| "loss": 0.1395, |
| "step": 1925 |
| }, |
| { |
| "epoch": 2.1149674620390457, |
| "grad_norm": 2.1353209018707275, |
| "learning_rate": 1.764627297655178e-05, |
| "loss": 0.1356, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.1420824295010847, |
| "grad_norm": 2.5639116764068604, |
| "learning_rate": 1.756784474680036e-05, |
| "loss": 0.1405, |
| "step": 1975 |
| }, |
| { |
| "epoch": 2.1691973969631237, |
| "grad_norm": 2.4028944969177246, |
| "learning_rate": 1.7488311624666165e-05, |
| "loss": 0.1379, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.1691973969631237, |
| "eval_loss": 0.15594108402729034, |
| "eval_runtime": 122.8675, |
| "eval_samples_per_second": 7.276, |
| "eval_steps_per_second": 0.081, |
| "eval_wer": 48.24699110413396, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 7000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 8, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.244313082331136e+19, |
| "train_batch_size": 96, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|