Ransaka's picture
Upload folder using huggingface_hub
b6db4a1 verified
{
"best_global_step": 2000,
"best_metric": 48.24699110413396,
"best_model_checkpoint": "output/checkpoint-2000",
"epoch": 2.1691973969631237,
"eval_steps": 1000,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.027114967462039046,
"grad_norm": 14.759312629699707,
"learning_rate": 9.600000000000001e-07,
"loss": 2.1989,
"step": 25
},
{
"epoch": 0.05422993492407809,
"grad_norm": 9.411042213439941,
"learning_rate": 1.9600000000000003e-06,
"loss": 1.9684,
"step": 50
},
{
"epoch": 0.08134490238611713,
"grad_norm": 6.554091930389404,
"learning_rate": 2.96e-06,
"loss": 1.6823,
"step": 75
},
{
"epoch": 0.10845986984815618,
"grad_norm": 4.735639572143555,
"learning_rate": 3.96e-06,
"loss": 1.5057,
"step": 100
},
{
"epoch": 0.13557483731019523,
"grad_norm": 5.951350688934326,
"learning_rate": 4.960000000000001e-06,
"loss": 1.4356,
"step": 125
},
{
"epoch": 0.16268980477223427,
"grad_norm": 7.765430450439453,
"learning_rate": 5.9600000000000005e-06,
"loss": 1.3835,
"step": 150
},
{
"epoch": 0.1898047722342733,
"grad_norm": 7.22494649887085,
"learning_rate": 6.96e-06,
"loss": 1.3296,
"step": 175
},
{
"epoch": 0.21691973969631237,
"grad_norm": 8.013534545898438,
"learning_rate": 7.960000000000002e-06,
"loss": 1.1737,
"step": 200
},
{
"epoch": 0.2440347071583514,
"grad_norm": 6.714569091796875,
"learning_rate": 8.96e-06,
"loss": 0.8834,
"step": 225
},
{
"epoch": 0.27114967462039047,
"grad_norm": 5.424898147583008,
"learning_rate": 9.960000000000001e-06,
"loss": 0.6345,
"step": 250
},
{
"epoch": 0.2982646420824295,
"grad_norm": 5.277531623840332,
"learning_rate": 1.0960000000000002e-05,
"loss": 0.5056,
"step": 275
},
{
"epoch": 0.32537960954446854,
"grad_norm": 5.730761528015137,
"learning_rate": 1.196e-05,
"loss": 0.4321,
"step": 300
},
{
"epoch": 0.3524945770065076,
"grad_norm": 6.359960556030273,
"learning_rate": 1.2960000000000001e-05,
"loss": 0.3845,
"step": 325
},
{
"epoch": 0.3796095444685466,
"grad_norm": 5.229251384735107,
"learning_rate": 1.396e-05,
"loss": 0.3523,
"step": 350
},
{
"epoch": 0.4067245119305857,
"grad_norm": 4.374991416931152,
"learning_rate": 1.496e-05,
"loss": 0.3281,
"step": 375
},
{
"epoch": 0.43383947939262474,
"grad_norm": 3.755828857421875,
"learning_rate": 1.5960000000000003e-05,
"loss": 0.3069,
"step": 400
},
{
"epoch": 0.4609544468546638,
"grad_norm": 5.2108306884765625,
"learning_rate": 1.696e-05,
"loss": 0.2993,
"step": 425
},
{
"epoch": 0.4880694143167028,
"grad_norm": 4.895330905914307,
"learning_rate": 1.796e-05,
"loss": 0.2924,
"step": 450
},
{
"epoch": 0.5151843817787418,
"grad_norm": 4.621529579162598,
"learning_rate": 1.896e-05,
"loss": 0.2796,
"step": 475
},
{
"epoch": 0.5422993492407809,
"grad_norm": 3.599890947341919,
"learning_rate": 1.9960000000000002e-05,
"loss": 0.2767,
"step": 500
},
{
"epoch": 0.56941431670282,
"grad_norm": 4.006639003753662,
"learning_rate": 1.9999327239243586e-05,
"loss": 0.2648,
"step": 525
},
{
"epoch": 0.596529284164859,
"grad_norm": 3.9036214351654053,
"learning_rate": 1.9997195761821797e-05,
"loss": 0.259,
"step": 550
},
{
"epoch": 0.6236442516268981,
"grad_norm": 3.259342908859253,
"learning_rate": 1.9993604710926203e-05,
"loss": 0.2501,
"step": 575
},
{
"epoch": 0.6507592190889371,
"grad_norm": 3.453716516494751,
"learning_rate": 1.998855461084408e-05,
"loss": 0.244,
"step": 600
},
{
"epoch": 0.6778741865509761,
"grad_norm": 3.6012680530548096,
"learning_rate": 1.9982046198881403e-05,
"loss": 0.2439,
"step": 625
},
{
"epoch": 0.7049891540130152,
"grad_norm": 3.275789260864258,
"learning_rate": 1.997408042525518e-05,
"loss": 0.2379,
"step": 650
},
{
"epoch": 0.7321041214750542,
"grad_norm": 4.458703994750977,
"learning_rate": 1.996465845295473e-05,
"loss": 0.2317,
"step": 675
},
{
"epoch": 0.7592190889370932,
"grad_norm": 3.2980549335479736,
"learning_rate": 1.9953781657571887e-05,
"loss": 0.2327,
"step": 700
},
{
"epoch": 0.7863340563991323,
"grad_norm": 3.0573136806488037,
"learning_rate": 1.9941451627100163e-05,
"loss": 0.2282,
"step": 725
},
{
"epoch": 0.8134490238611713,
"grad_norm": 3.351287364959717,
"learning_rate": 1.9927670161702906e-05,
"loss": 0.2209,
"step": 750
},
{
"epoch": 0.8405639913232104,
"grad_norm": 2.9262197017669678,
"learning_rate": 1.991243927345048e-05,
"loss": 0.2219,
"step": 775
},
{
"epoch": 0.8676789587852495,
"grad_norm": 3.440474033355713,
"learning_rate": 1.989576118602651e-05,
"loss": 0.2201,
"step": 800
},
{
"epoch": 0.8947939262472885,
"grad_norm": 2.9730112552642822,
"learning_rate": 1.987763833440322e-05,
"loss": 0.2117,
"step": 825
},
{
"epoch": 0.9219088937093276,
"grad_norm": 2.7122719287872314,
"learning_rate": 1.9858073364485933e-05,
"loss": 0.2083,
"step": 850
},
{
"epoch": 0.9490238611713666,
"grad_norm": 2.5038318634033203,
"learning_rate": 1.9837069132726775e-05,
"loss": 0.2061,
"step": 875
},
{
"epoch": 0.9761388286334056,
"grad_norm": 2.8700103759765625,
"learning_rate": 1.9814628705707643e-05,
"loss": 0.2067,
"step": 900
},
{
"epoch": 1.0032537960954446,
"grad_norm": 2.7776284217834473,
"learning_rate": 1.979075535969248e-05,
"loss": 0.1967,
"step": 925
},
{
"epoch": 1.0303687635574836,
"grad_norm": 2.9575154781341553,
"learning_rate": 1.9765452580148954e-05,
"loss": 0.1905,
"step": 950
},
{
"epoch": 1.0574837310195229,
"grad_norm": 2.7380096912384033,
"learning_rate": 1.9738724061239574e-05,
"loss": 0.1865,
"step": 975
},
{
"epoch": 1.0845986984815619,
"grad_norm": 2.5455238819122314,
"learning_rate": 1.971057370528237e-05,
"loss": 0.1855,
"step": 1000
},
{
"epoch": 1.0845986984815619,
"eval_loss": 0.19655342400074005,
"eval_runtime": 123.0419,
"eval_samples_per_second": 7.266,
"eval_steps_per_second": 0.081,
"eval_wer": 56.07012035583464,
"step": 1000
},
{
"epoch": 1.1117136659436009,
"grad_norm": 2.596179723739624,
"learning_rate": 1.9681005622181137e-05,
"loss": 0.1897,
"step": 1025
},
{
"epoch": 1.13882863340564,
"grad_norm": 2.540958881378174,
"learning_rate": 1.9650024128825406e-05,
"loss": 0.1833,
"step": 1050
},
{
"epoch": 1.165943600867679,
"grad_norm": 2.2400152683258057,
"learning_rate": 1.9617633748460193e-05,
"loss": 0.1874,
"step": 1075
},
{
"epoch": 1.1930585683297181,
"grad_norm": 2.717508316040039,
"learning_rate": 1.958383921002561e-05,
"loss": 0.1854,
"step": 1100
},
{
"epoch": 1.2201735357917571,
"grad_norm": 2.3038101196289062,
"learning_rate": 1.9548645447466433e-05,
"loss": 0.18,
"step": 1125
},
{
"epoch": 1.2472885032537961,
"grad_norm": 2.39315128326416,
"learning_rate": 1.951205759901177e-05,
"loss": 0.1823,
"step": 1150
},
{
"epoch": 1.2744034707158352,
"grad_norm": 2.5433220863342285,
"learning_rate": 1.947408100642489e-05,
"loss": 0.1764,
"step": 1175
},
{
"epoch": 1.3015184381778742,
"grad_norm": 2.687659740447998,
"learning_rate": 1.943472121422332e-05,
"loss": 0.177,
"step": 1200
},
{
"epoch": 1.3286334056399132,
"grad_norm": 2.7993059158325195,
"learning_rate": 1.939398396886937e-05,
"loss": 0.1825,
"step": 1225
},
{
"epoch": 1.3557483731019522,
"grad_norm": 2.1816444396972656,
"learning_rate": 1.9351875217931154e-05,
"loss": 0.1717,
"step": 1250
},
{
"epoch": 1.3828633405639914,
"grad_norm": 2.8464884757995605,
"learning_rate": 1.930840110921425e-05,
"loss": 0.1719,
"step": 1275
},
{
"epoch": 1.4099783080260304,
"grad_norm": 2.4136722087860107,
"learning_rate": 1.9263567989864135e-05,
"loss": 0.1673,
"step": 1300
},
{
"epoch": 1.4370932754880694,
"grad_norm": 2.15059757232666,
"learning_rate": 1.921738240543951e-05,
"loss": 0.1723,
"step": 1325
},
{
"epoch": 1.4642082429501084,
"grad_norm": 2.8437695503234863,
"learning_rate": 1.916985109895668e-05,
"loss": 0.1736,
"step": 1350
},
{
"epoch": 1.4913232104121474,
"grad_norm": 2.24259090423584,
"learning_rate": 1.9120981009905044e-05,
"loss": 0.168,
"step": 1375
},
{
"epoch": 1.5184381778741867,
"grad_norm": 2.81048583984375,
"learning_rate": 1.907077927323398e-05,
"loss": 0.1728,
"step": 1400
},
{
"epoch": 1.5455531453362257,
"grad_norm": 2.333526372909546,
"learning_rate": 1.901925321831114e-05,
"loss": 0.1684,
"step": 1425
},
{
"epoch": 1.5726681127982647,
"grad_norm": 2.995264768600464,
"learning_rate": 1.896641036785236e-05,
"loss": 0.1637,
"step": 1450
},
{
"epoch": 1.5997830802603037,
"grad_norm": 2.2448666095733643,
"learning_rate": 1.891225843682339e-05,
"loss": 0.161,
"step": 1475
},
{
"epoch": 1.6268980477223427,
"grad_norm": 2.1202893257141113,
"learning_rate": 1.8856805331313487e-05,
"loss": 0.1662,
"step": 1500
},
{
"epoch": 1.6540130151843817,
"grad_norm": 2.316800594329834,
"learning_rate": 1.8800059147381172e-05,
"loss": 0.1664,
"step": 1525
},
{
"epoch": 1.6811279826464207,
"grad_norm": 2.7500040531158447,
"learning_rate": 1.8742028169872188e-05,
"loss": 0.1564,
"step": 1550
},
{
"epoch": 1.7082429501084597,
"grad_norm": 2.1926183700561523,
"learning_rate": 1.868272087120995e-05,
"loss": 0.1608,
"step": 1575
},
{
"epoch": 1.735357917570499,
"grad_norm": 2.438418388366699,
"learning_rate": 1.8622145910158568e-05,
"loss": 0.1666,
"step": 1600
},
{
"epoch": 1.762472885032538,
"grad_norm": 2.249562978744507,
"learning_rate": 1.8560312130558706e-05,
"loss": 0.1613,
"step": 1625
},
{
"epoch": 1.789587852494577,
"grad_norm": 2.3808553218841553,
"learning_rate": 1.849722856003637e-05,
"loss": 0.1591,
"step": 1650
},
{
"epoch": 1.8167028199566162,
"grad_norm": 2.087522029876709,
"learning_rate": 1.8432904408684912e-05,
"loss": 0.1569,
"step": 1675
},
{
"epoch": 1.8438177874186552,
"grad_norm": 2.3600339889526367,
"learning_rate": 1.836734906772035e-05,
"loss": 0.1575,
"step": 1700
},
{
"epoch": 1.8709327548806942,
"grad_norm": 2.239959239959717,
"learning_rate": 1.8300572108110287e-05,
"loss": 0.1578,
"step": 1725
},
{
"epoch": 1.8980477223427332,
"grad_norm": 2.024038553237915,
"learning_rate": 1.823258327917656e-05,
"loss": 0.1575,
"step": 1750
},
{
"epoch": 1.9251626898047722,
"grad_norm": 2.0424182415008545,
"learning_rate": 1.816339250717184e-05,
"loss": 0.1603,
"step": 1775
},
{
"epoch": 1.9522776572668112,
"grad_norm": 2.4677207469940186,
"learning_rate": 1.809300989383045e-05,
"loss": 0.1531,
"step": 1800
},
{
"epoch": 1.9793926247288502,
"grad_norm": 1.9991437196731567,
"learning_rate": 1.802144571489349e-05,
"loss": 0.1519,
"step": 1825
},
{
"epoch": 2.0065075921908893,
"grad_norm": 2.2862942218780518,
"learning_rate": 1.7948710418608626e-05,
"loss": 0.1476,
"step": 1850
},
{
"epoch": 2.0336225596529283,
"grad_norm": 2.52840256690979,
"learning_rate": 1.787481462420465e-05,
"loss": 0.1376,
"step": 1875
},
{
"epoch": 2.0607375271149673,
"grad_norm": 2.4488773345947266,
"learning_rate": 1.779976912034109e-05,
"loss": 0.1368,
"step": 1900
},
{
"epoch": 2.0878524945770067,
"grad_norm": 2.1182641983032227,
"learning_rate": 1.772358486353309e-05,
"loss": 0.1395,
"step": 1925
},
{
"epoch": 2.1149674620390457,
"grad_norm": 2.1353209018707275,
"learning_rate": 1.764627297655178e-05,
"loss": 0.1356,
"step": 1950
},
{
"epoch": 2.1420824295010847,
"grad_norm": 2.5639116764068604,
"learning_rate": 1.756784474680036e-05,
"loss": 0.1405,
"step": 1975
},
{
"epoch": 2.1691973969631237,
"grad_norm": 2.4028944969177246,
"learning_rate": 1.7488311624666165e-05,
"loss": 0.1379,
"step": 2000
},
{
"epoch": 2.1691973969631237,
"eval_loss": 0.15594108402729034,
"eval_runtime": 122.8675,
"eval_samples_per_second": 7.276,
"eval_steps_per_second": 0.081,
"eval_wer": 48.24699110413396,
"step": 2000
}
],
"logging_steps": 25,
"max_steps": 7000,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.244313082331136e+19,
"train_batch_size": 96,
"trial_name": null,
"trial_params": null
}