| { | |
| "best_metric": 0.6916529645033369, | |
| "best_model_checkpoint": "d:\\DataTicon\\Whisper-Khmer-Small\\whisper-khmer\\outputs\\whisper-khmer-tiny\\checkpoint-3000", | |
| "epoch": 2.7447392497712717, | |
| "eval_steps": 500, | |
| "global_step": 3000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009149130832570906, | |
| "grad_norm": 76.3263931274414, | |
| "learning_rate": 5e-06, | |
| "loss": 3.7253, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.018298261665141813, | |
| "grad_norm": 23.748031616210938, | |
| "learning_rate": 1.5e-05, | |
| "loss": 2.8873, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.027447392497712716, | |
| "grad_norm": 17.664098739624023, | |
| "learning_rate": 2.5e-05, | |
| "loss": 2.2375, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.036596523330283626, | |
| "grad_norm": 12.52354907989502, | |
| "learning_rate": 3.5e-05, | |
| "loss": 1.8201, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04574565416285453, | |
| "grad_norm": 27.447662353515625, | |
| "learning_rate": 4.5e-05, | |
| "loss": 1.6313, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05489478499542543, | |
| "grad_norm": 24.413909912109375, | |
| "learning_rate": 4.992257664911737e-05, | |
| "loss": 1.6002, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06404391582799634, | |
| "grad_norm": 21.101381301879883, | |
| "learning_rate": 4.976772994735213e-05, | |
| "loss": 1.5404, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07319304666056725, | |
| "grad_norm": 14.289061546325684, | |
| "learning_rate": 4.961288324558687e-05, | |
| "loss": 1.4752, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08234217749313816, | |
| "grad_norm": 14.361418724060059, | |
| "learning_rate": 4.945803654382162e-05, | |
| "loss": 1.4342, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09149130832570906, | |
| "grad_norm": 10.036293983459473, | |
| "learning_rate": 4.930318984205637e-05, | |
| "loss": 1.3983, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10064043915827996, | |
| "grad_norm": 14.588384628295898, | |
| "learning_rate": 4.9148343140291115e-05, | |
| "loss": 1.3811, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.10978956999085086, | |
| "grad_norm": 11.769558906555176, | |
| "learning_rate": 4.899349643852586e-05, | |
| "loss": 1.3395, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.11893870082342177, | |
| "grad_norm": 15.153656959533691, | |
| "learning_rate": 4.883864973676061e-05, | |
| "loss": 1.3043, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1280878316559927, | |
| "grad_norm": 10.691374778747559, | |
| "learning_rate": 4.8683803034995354e-05, | |
| "loss": 1.2971, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.1372369624885636, | |
| "grad_norm": 18.516103744506836, | |
| "learning_rate": 4.85289563332301e-05, | |
| "loss": 1.2716, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1463860933211345, | |
| "grad_norm": 10.616347312927246, | |
| "learning_rate": 4.837410963146485e-05, | |
| "loss": 1.2456, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1555352241537054, | |
| "grad_norm": 14.87991714477539, | |
| "learning_rate": 4.82192629296996e-05, | |
| "loss": 1.2224, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.16468435498627632, | |
| "grad_norm": 19.040470123291016, | |
| "learning_rate": 4.806441622793435e-05, | |
| "loss": 1.2364, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.17383348581884722, | |
| "grad_norm": 18.312976837158203, | |
| "learning_rate": 4.7909569526169096e-05, | |
| "loss": 1.1906, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.18298261665141813, | |
| "grad_norm": 21.12237548828125, | |
| "learning_rate": 4.7754722824403844e-05, | |
| "loss": 1.1894, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.19213174748398903, | |
| "grad_norm": 8.743210792541504, | |
| "learning_rate": 4.759987612263859e-05, | |
| "loss": 1.1648, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2012808783165599, | |
| "grad_norm": 11.74830150604248, | |
| "learning_rate": 4.744502942087334e-05, | |
| "loss": 1.1461, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.21043000914913082, | |
| "grad_norm": 15.216257095336914, | |
| "learning_rate": 4.729018271910808e-05, | |
| "loss": 1.1189, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.21957913998170173, | |
| "grad_norm": 17.434612274169922, | |
| "learning_rate": 4.713533601734284e-05, | |
| "loss": 1.126, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.22872827081427263, | |
| "grad_norm": 11.15873908996582, | |
| "learning_rate": 4.698048931557758e-05, | |
| "loss": 1.0959, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.23787740164684354, | |
| "grad_norm": 13.587617874145508, | |
| "learning_rate": 4.682564261381233e-05, | |
| "loss": 1.0969, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.24702653247941445, | |
| "grad_norm": 11.1122407913208, | |
| "learning_rate": 4.6670795912047076e-05, | |
| "loss": 1.0839, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2561756633119854, | |
| "grad_norm": 16.767852783203125, | |
| "learning_rate": 4.6515949210281825e-05, | |
| "loss": 1.053, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2653247941445563, | |
| "grad_norm": 14.137765884399414, | |
| "learning_rate": 4.6361102508516566e-05, | |
| "loss": 1.0309, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2744739249771272, | |
| "grad_norm": 13.212748527526855, | |
| "learning_rate": 4.620625580675132e-05, | |
| "loss": 0.9809, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2836230558096981, | |
| "grad_norm": 12.678833961486816, | |
| "learning_rate": 4.605140910498606e-05, | |
| "loss": 0.9719, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.292772186642269, | |
| "grad_norm": 11.58700180053711, | |
| "learning_rate": 4.589656240322082e-05, | |
| "loss": 0.9258, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3019213174748399, | |
| "grad_norm": 10.919293403625488, | |
| "learning_rate": 4.574171570145556e-05, | |
| "loss": 0.8796, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3110704483074108, | |
| "grad_norm": 10.442853927612305, | |
| "learning_rate": 4.558686899969031e-05, | |
| "loss": 0.8912, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3202195791399817, | |
| "grad_norm": 11.729557991027832, | |
| "learning_rate": 4.543202229792506e-05, | |
| "loss": 0.8275, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.32936870997255263, | |
| "grad_norm": 9.217303276062012, | |
| "learning_rate": 4.5277175596159805e-05, | |
| "loss": 0.7889, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.33851784080512354, | |
| "grad_norm": 13.999395370483398, | |
| "learning_rate": 4.512232889439455e-05, | |
| "loss": 0.736, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.34766697163769444, | |
| "grad_norm": 18.503355026245117, | |
| "learning_rate": 4.49674821926293e-05, | |
| "loss": 0.7104, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.35681610247026535, | |
| "grad_norm": 11.04101848602295, | |
| "learning_rate": 4.4812635490864044e-05, | |
| "loss": 0.7163, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.36596523330283626, | |
| "grad_norm": 9.643781661987305, | |
| "learning_rate": 4.465778878909879e-05, | |
| "loss": 0.6944, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.37511436413540716, | |
| "grad_norm": 8.121737480163574, | |
| "learning_rate": 4.450294208733354e-05, | |
| "loss": 0.6771, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.38426349496797807, | |
| "grad_norm": 12.704200744628906, | |
| "learning_rate": 4.434809538556829e-05, | |
| "loss": 0.6185, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3934126258005489, | |
| "grad_norm": 8.623883247375488, | |
| "learning_rate": 4.419324868380304e-05, | |
| "loss": 0.6471, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4025617566331198, | |
| "grad_norm": 9.99401569366455, | |
| "learning_rate": 4.4038401982037786e-05, | |
| "loss": 0.6112, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.41171088746569073, | |
| "grad_norm": 9.302281379699707, | |
| "learning_rate": 4.3883555280272534e-05, | |
| "loss": 0.6054, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.42086001829826164, | |
| "grad_norm": 12.407485961914062, | |
| "learning_rate": 4.372870857850728e-05, | |
| "loss": 0.6045, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.43000914913083255, | |
| "grad_norm": 11.521641731262207, | |
| "learning_rate": 4.357386187674203e-05, | |
| "loss": 0.5927, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.43915827996340345, | |
| "grad_norm": 18.213590621948242, | |
| "learning_rate": 4.341901517497677e-05, | |
| "loss": 0.5654, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.44830741079597436, | |
| "grad_norm": 9.964733123779297, | |
| "learning_rate": 4.326416847321153e-05, | |
| "loss": 0.5561, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.45745654162854527, | |
| "grad_norm": 10.646913528442383, | |
| "learning_rate": 4.310932177144627e-05, | |
| "loss": 0.5174, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.45745654162854527, | |
| "eval_loss": 0.5902902483940125, | |
| "eval_runtime": 436.5208, | |
| "eval_samples_per_second": 1.766, | |
| "eval_steps_per_second": 0.112, | |
| "eval_wer": 0.9019247509430313, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.46660567246111617, | |
| "grad_norm": 7.100905895233154, | |
| "learning_rate": 4.295447506968102e-05, | |
| "loss": 0.5449, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4757548032936871, | |
| "grad_norm": 14.554773330688477, | |
| "learning_rate": 4.2799628367915767e-05, | |
| "loss": 0.5277, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.484903934126258, | |
| "grad_norm": 12.082781791687012, | |
| "learning_rate": 4.2644781666150515e-05, | |
| "loss": 0.5299, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.4940530649588289, | |
| "grad_norm": 8.607912063598633, | |
| "learning_rate": 4.2489934964385257e-05, | |
| "loss": 0.5395, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5032021957913998, | |
| "grad_norm": 9.982528686523438, | |
| "learning_rate": 4.233508826262001e-05, | |
| "loss": 0.5197, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5123513266239708, | |
| "grad_norm": 12.866645812988281, | |
| "learning_rate": 4.2180241560854753e-05, | |
| "loss": 0.4857, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5215004574565416, | |
| "grad_norm": 9.12654972076416, | |
| "learning_rate": 4.20253948590895e-05, | |
| "loss": 0.4852, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5306495882891126, | |
| "grad_norm": 7.0818705558776855, | |
| "learning_rate": 4.187054815732425e-05, | |
| "loss": 0.5013, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5397987191216834, | |
| "grad_norm": 9.520069122314453, | |
| "learning_rate": 4.1715701455559e-05, | |
| "loss": 0.4656, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5489478499542544, | |
| "grad_norm": 8.271717071533203, | |
| "learning_rate": 4.156085475379375e-05, | |
| "loss": 0.4866, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5580969807868252, | |
| "grad_norm": 9.679398536682129, | |
| "learning_rate": 4.1406008052028496e-05, | |
| "loss": 0.5045, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5672461116193962, | |
| "grad_norm": 9.209792137145996, | |
| "learning_rate": 4.125116135026324e-05, | |
| "loss": 0.435, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.576395242451967, | |
| "grad_norm": 7.2256669998168945, | |
| "learning_rate": 4.109631464849799e-05, | |
| "loss": 0.4575, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.585544373284538, | |
| "grad_norm": 7.8047990798950195, | |
| "learning_rate": 4.0941467946732734e-05, | |
| "loss": 0.4598, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5946935041171089, | |
| "grad_norm": 7.035597801208496, | |
| "learning_rate": 4.078662124496748e-05, | |
| "loss": 0.4714, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6038426349496798, | |
| "grad_norm": 7.996973514556885, | |
| "learning_rate": 4.063177454320223e-05, | |
| "loss": 0.4596, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6129917657822507, | |
| "grad_norm": 6.872828960418701, | |
| "learning_rate": 4.047692784143698e-05, | |
| "loss": 0.4106, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6221408966148216, | |
| "grad_norm": 6.923854827880859, | |
| "learning_rate": 4.032208113967173e-05, | |
| "loss": 0.42, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6312900274473925, | |
| "grad_norm": 12.70057487487793, | |
| "learning_rate": 4.0167234437906476e-05, | |
| "loss": 0.4229, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6404391582799634, | |
| "grad_norm": 6.876515865325928, | |
| "learning_rate": 4.0012387736141224e-05, | |
| "loss": 0.4114, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6495882891125343, | |
| "grad_norm": 7.49954891204834, | |
| "learning_rate": 3.9857541034375966e-05, | |
| "loss": 0.4163, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6587374199451053, | |
| "grad_norm": 6.375706672668457, | |
| "learning_rate": 3.970269433261072e-05, | |
| "loss": 0.4067, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6678865507776761, | |
| "grad_norm": 5.803896427154541, | |
| "learning_rate": 3.954784763084546e-05, | |
| "loss": 0.4269, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.6770356816102471, | |
| "grad_norm": 9.036760330200195, | |
| "learning_rate": 3.939300092908022e-05, | |
| "loss": 0.4117, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6861848124428179, | |
| "grad_norm": 6.481241226196289, | |
| "learning_rate": 3.923815422731496e-05, | |
| "loss": 0.4561, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6953339432753889, | |
| "grad_norm": 7.707711219787598, | |
| "learning_rate": 3.908330752554971e-05, | |
| "loss": 0.4075, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7044830741079597, | |
| "grad_norm": 6.894267559051514, | |
| "learning_rate": 3.892846082378446e-05, | |
| "loss": 0.4036, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7136322049405307, | |
| "grad_norm": 6.747013568878174, | |
| "learning_rate": 3.8773614122019205e-05, | |
| "loss": 0.4139, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7227813357731016, | |
| "grad_norm": 8.749561309814453, | |
| "learning_rate": 3.861876742025395e-05, | |
| "loss": 0.391, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.7319304666056725, | |
| "grad_norm": 6.197606086730957, | |
| "learning_rate": 3.84639207184887e-05, | |
| "loss": 0.4115, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7410795974382434, | |
| "grad_norm": 6.012449264526367, | |
| "learning_rate": 3.8309074016723444e-05, | |
| "loss": 0.43, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.7502287282708143, | |
| "grad_norm": 9.235795021057129, | |
| "learning_rate": 3.815422731495819e-05, | |
| "loss": 0.4013, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.7593778591033852, | |
| "grad_norm": 6.508467197418213, | |
| "learning_rate": 3.799938061319294e-05, | |
| "loss": 0.4084, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.7685269899359561, | |
| "grad_norm": 12.164517402648926, | |
| "learning_rate": 3.784453391142769e-05, | |
| "loss": 0.422, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.777676120768527, | |
| "grad_norm": 6.47005033493042, | |
| "learning_rate": 3.768968720966244e-05, | |
| "loss": 0.3806, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.7868252516010978, | |
| "grad_norm": 6.4245476722717285, | |
| "learning_rate": 3.7534840507897186e-05, | |
| "loss": 0.3772, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.7959743824336688, | |
| "grad_norm": 6.941617965698242, | |
| "learning_rate": 3.737999380613193e-05, | |
| "loss": 0.3621, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8051235132662397, | |
| "grad_norm": 6.679232120513916, | |
| "learning_rate": 3.722514710436668e-05, | |
| "loss": 0.3699, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.8142726440988106, | |
| "grad_norm": 7.287721157073975, | |
| "learning_rate": 3.7070300402601424e-05, | |
| "loss": 0.3728, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8234217749313815, | |
| "grad_norm": 7.297004699707031, | |
| "learning_rate": 3.691545370083617e-05, | |
| "loss": 0.3823, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8325709057639524, | |
| "grad_norm": 5.730973720550537, | |
| "learning_rate": 3.676060699907092e-05, | |
| "loss": 0.3716, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.8417200365965233, | |
| "grad_norm": 8.157340049743652, | |
| "learning_rate": 3.660576029730567e-05, | |
| "loss": 0.3731, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.8508691674290942, | |
| "grad_norm": 8.863473892211914, | |
| "learning_rate": 3.645091359554042e-05, | |
| "loss": 0.3445, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.8600182982616651, | |
| "grad_norm": 5.911675453186035, | |
| "learning_rate": 3.6296066893775166e-05, | |
| "loss": 0.3671, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.869167429094236, | |
| "grad_norm": 6.246954441070557, | |
| "learning_rate": 3.6141220192009915e-05, | |
| "loss": 0.3876, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.8783165599268069, | |
| "grad_norm": 4.594511032104492, | |
| "learning_rate": 3.5986373490244656e-05, | |
| "loss": 0.3637, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.8874656907593779, | |
| "grad_norm": 7.323066234588623, | |
| "learning_rate": 3.583152678847941e-05, | |
| "loss": 0.3624, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.8966148215919487, | |
| "grad_norm": 6.408933639526367, | |
| "learning_rate": 3.567668008671415e-05, | |
| "loss": 0.3496, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9057639524245197, | |
| "grad_norm": 5.430429935455322, | |
| "learning_rate": 3.552183338494891e-05, | |
| "loss": 0.368, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.9149130832570905, | |
| "grad_norm": 7.088529109954834, | |
| "learning_rate": 3.536698668318365e-05, | |
| "loss": 0.3655, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9149130832570905, | |
| "eval_loss": 0.3907645046710968, | |
| "eval_runtime": 424.8249, | |
| "eval_samples_per_second": 1.815, | |
| "eval_steps_per_second": 0.115, | |
| "eval_wer": 0.8130380114130961, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9240622140896615, | |
| "grad_norm": 5.1332292556762695, | |
| "learning_rate": 3.52121399814184e-05, | |
| "loss": 0.3153, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.9332113449222323, | |
| "grad_norm": 5.477539539337158, | |
| "learning_rate": 3.505729327965315e-05, | |
| "loss": 0.3383, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.9423604757548033, | |
| "grad_norm": 6.7095866203308105, | |
| "learning_rate": 3.4902446577887895e-05, | |
| "loss": 0.3351, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.9515096065873742, | |
| "grad_norm": 4.704165935516357, | |
| "learning_rate": 3.474759987612264e-05, | |
| "loss": 0.3144, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.9606587374199451, | |
| "grad_norm": 9.83104419708252, | |
| "learning_rate": 3.459275317435739e-05, | |
| "loss": 0.3667, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.969807868252516, | |
| "grad_norm": 6.172043323516846, | |
| "learning_rate": 3.4437906472592134e-05, | |
| "loss": 0.3298, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.9789569990850869, | |
| "grad_norm": 6.027336597442627, | |
| "learning_rate": 3.428305977082688e-05, | |
| "loss": 0.3269, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.9881061299176578, | |
| "grad_norm": 6.435912132263184, | |
| "learning_rate": 3.412821306906163e-05, | |
| "loss": 0.3203, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.9972552607502287, | |
| "grad_norm": 7.0265913009643555, | |
| "learning_rate": 3.397336636729638e-05, | |
| "loss": 0.3413, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.0064043915827996, | |
| "grad_norm": 6.904513835906982, | |
| "learning_rate": 3.381851966553112e-05, | |
| "loss": 0.3164, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.0155535224153704, | |
| "grad_norm": 5.236996173858643, | |
| "learning_rate": 3.3663672963765876e-05, | |
| "loss": 0.2675, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.0247026532479415, | |
| "grad_norm": 6.988259792327881, | |
| "learning_rate": 3.350882626200062e-05, | |
| "loss": 0.3134, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.0338517840805124, | |
| "grad_norm": 8.87595272064209, | |
| "learning_rate": 3.335397956023537e-05, | |
| "loss": 0.3065, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.0430009149130832, | |
| "grad_norm": 7.2589287757873535, | |
| "learning_rate": 3.3199132858470114e-05, | |
| "loss": 0.2798, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.052150045745654, | |
| "grad_norm": 7.233737945556641, | |
| "learning_rate": 3.304428615670486e-05, | |
| "loss": 0.2954, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.0612991765782251, | |
| "grad_norm": 4.9386887550354, | |
| "learning_rate": 3.288943945493961e-05, | |
| "loss": 0.2959, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.070448307410796, | |
| "grad_norm": 6.335395812988281, | |
| "learning_rate": 3.273459275317436e-05, | |
| "loss": 0.295, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.0795974382433668, | |
| "grad_norm": 6.33104944229126, | |
| "learning_rate": 3.257974605140911e-05, | |
| "loss": 0.2997, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.0887465690759377, | |
| "grad_norm": 5.694860458374023, | |
| "learning_rate": 3.2424899349643856e-05, | |
| "loss": 0.262, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.0978956999085088, | |
| "grad_norm": 5.777647018432617, | |
| "learning_rate": 3.2270052647878605e-05, | |
| "loss": 0.2822, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.1070448307410796, | |
| "grad_norm": 8.250167846679688, | |
| "learning_rate": 3.2115205946113346e-05, | |
| "loss": 0.2778, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.1161939615736505, | |
| "grad_norm": 4.275432586669922, | |
| "learning_rate": 3.19603592443481e-05, | |
| "loss": 0.2793, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.1253430924062213, | |
| "grad_norm": 4.831576824188232, | |
| "learning_rate": 3.180551254258284e-05, | |
| "loss": 0.2815, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.1344922232387924, | |
| "grad_norm": 5.6868720054626465, | |
| "learning_rate": 3.165066584081759e-05, | |
| "loss": 0.3085, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.1436413540713632, | |
| "grad_norm": 5.878891944885254, | |
| "learning_rate": 3.149581913905234e-05, | |
| "loss": 0.2543, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.152790484903934, | |
| "grad_norm": 5.85615348815918, | |
| "learning_rate": 3.134097243728709e-05, | |
| "loss": 0.2538, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.161939615736505, | |
| "grad_norm": 4.6179118156433105, | |
| "learning_rate": 3.118612573552184e-05, | |
| "loss": 0.2565, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.171088746569076, | |
| "grad_norm": 5.257189750671387, | |
| "learning_rate": 3.1031279033756585e-05, | |
| "loss": 0.2681, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.1802378774016469, | |
| "grad_norm": 4.76942253112793, | |
| "learning_rate": 3.087643233199133e-05, | |
| "loss": 0.2826, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.1893870082342177, | |
| "grad_norm": 5.82953405380249, | |
| "learning_rate": 3.072158563022608e-05, | |
| "loss": 0.2826, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.1985361390667886, | |
| "grad_norm": 5.2305731773376465, | |
| "learning_rate": 3.0566738928460824e-05, | |
| "loss": 0.2598, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.2076852698993596, | |
| "grad_norm": 5.51474666595459, | |
| "learning_rate": 3.0411892226695572e-05, | |
| "loss": 0.2685, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.2168344007319305, | |
| "grad_norm": 7.23142147064209, | |
| "learning_rate": 3.025704552493032e-05, | |
| "loss": 0.285, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.2259835315645013, | |
| "grad_norm": 5.186690807342529, | |
| "learning_rate": 3.010219882316507e-05, | |
| "loss": 0.2872, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.2351326623970722, | |
| "grad_norm": 5.723147392272949, | |
| "learning_rate": 2.9947352121399814e-05, | |
| "loss": 0.2631, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.2442817932296433, | |
| "grad_norm": 4.612165451049805, | |
| "learning_rate": 2.9792505419634566e-05, | |
| "loss": 0.2966, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.2534309240622141, | |
| "grad_norm": 5.467476844787598, | |
| "learning_rate": 2.963765871786931e-05, | |
| "loss": 0.2308, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.262580054894785, | |
| "grad_norm": 4.7134785652160645, | |
| "learning_rate": 2.9482812016104063e-05, | |
| "loss": 0.2781, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.2717291857273558, | |
| "grad_norm": 4.138732433319092, | |
| "learning_rate": 2.9327965314338808e-05, | |
| "loss": 0.2452, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.2808783165599267, | |
| "grad_norm": 4.39865255355835, | |
| "learning_rate": 2.9173118612573553e-05, | |
| "loss": 0.2759, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.2900274473924978, | |
| "grad_norm": 6.269981384277344, | |
| "learning_rate": 2.9018271910808305e-05, | |
| "loss": 0.2802, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.2991765782250686, | |
| "grad_norm": 5.472837924957275, | |
| "learning_rate": 2.886342520904305e-05, | |
| "loss": 0.282, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.3083257090576395, | |
| "grad_norm": 5.290619850158691, | |
| "learning_rate": 2.8708578507277795e-05, | |
| "loss": 0.2443, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.3174748398902105, | |
| "grad_norm": 4.903107643127441, | |
| "learning_rate": 2.8553731805512546e-05, | |
| "loss": 0.255, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.3266239707227814, | |
| "grad_norm": 5.144070625305176, | |
| "learning_rate": 2.839888510374729e-05, | |
| "loss": 0.2375, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.3357731015553522, | |
| "grad_norm": 4.945043087005615, | |
| "learning_rate": 2.8244038401982036e-05, | |
| "loss": 0.2381, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.344922232387923, | |
| "grad_norm": 5.670736789703369, | |
| "learning_rate": 2.8089191700216788e-05, | |
| "loss": 0.2398, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.354071363220494, | |
| "grad_norm": 5.526036739349365, | |
| "learning_rate": 2.7934344998451533e-05, | |
| "loss": 0.2748, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.363220494053065, | |
| "grad_norm": 4.805148601531982, | |
| "learning_rate": 2.7779498296686278e-05, | |
| "loss": 0.2412, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.3723696248856359, | |
| "grad_norm": 4.122767925262451, | |
| "learning_rate": 2.762465159492103e-05, | |
| "loss": 0.2805, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.3723696248856359, | |
| "eval_loss": 0.33601683378219604, | |
| "eval_runtime": 432.4301, | |
| "eval_samples_per_second": 1.783, | |
| "eval_steps_per_second": 0.113, | |
| "eval_wer": 0.7586807234742238, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.3815187557182067, | |
| "grad_norm": 4.068643569946289, | |
| "learning_rate": 2.7469804893155775e-05, | |
| "loss": 0.2527, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.3906678865507778, | |
| "grad_norm": 5.818108081817627, | |
| "learning_rate": 2.7314958191390527e-05, | |
| "loss": 0.2707, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.3998170173833486, | |
| "grad_norm": 6.448596477508545, | |
| "learning_rate": 2.7160111489625272e-05, | |
| "loss": 0.2466, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.4089661482159195, | |
| "grad_norm": 6.120127201080322, | |
| "learning_rate": 2.7005264787860017e-05, | |
| "loss": 0.259, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.4181152790484903, | |
| "grad_norm": 4.396270751953125, | |
| "learning_rate": 2.685041808609477e-05, | |
| "loss": 0.2505, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.4272644098810612, | |
| "grad_norm": 3.8976686000823975, | |
| "learning_rate": 2.6695571384329514e-05, | |
| "loss": 0.2429, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.4364135407136323, | |
| "grad_norm": 4.241589069366455, | |
| "learning_rate": 2.6540724682564262e-05, | |
| "loss": 0.2424, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.445562671546203, | |
| "grad_norm": 6.113090515136719, | |
| "learning_rate": 2.638587798079901e-05, | |
| "loss": 0.2642, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.454711802378774, | |
| "grad_norm": 4.122611999511719, | |
| "learning_rate": 2.623103127903376e-05, | |
| "loss": 0.2259, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.463860933211345, | |
| "grad_norm": 4.869472026824951, | |
| "learning_rate": 2.6076184577268504e-05, | |
| "loss": 0.2542, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.4730100640439159, | |
| "grad_norm": 4.926369667053223, | |
| "learning_rate": 2.5921337875503256e-05, | |
| "loss": 0.2789, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.4821591948764867, | |
| "grad_norm": 5.319028854370117, | |
| "learning_rate": 2.5766491173738e-05, | |
| "loss": 0.265, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.4913083257090576, | |
| "grad_norm": 6.620922088623047, | |
| "learning_rate": 2.5611644471972746e-05, | |
| "loss": 0.2442, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.5004574565416284, | |
| "grad_norm": 5.458837032318115, | |
| "learning_rate": 2.5456797770207498e-05, | |
| "loss": 0.252, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.5096065873741995, | |
| "grad_norm": 5.415153503417969, | |
| "learning_rate": 2.5301951068442243e-05, | |
| "loss": 0.237, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.5187557182067704, | |
| "grad_norm": 4.952278137207031, | |
| "learning_rate": 2.5147104366676995e-05, | |
| "loss": 0.2577, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.5279048490393414, | |
| "grad_norm": 4.834970951080322, | |
| "learning_rate": 2.499225766491174e-05, | |
| "loss": 0.2452, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.5370539798719123, | |
| "grad_norm": 5.410050392150879, | |
| "learning_rate": 2.4837410963146488e-05, | |
| "loss": 0.2258, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.5462031107044831, | |
| "grad_norm": 4.104517936706543, | |
| "learning_rate": 2.4682564261381233e-05, | |
| "loss": 0.229, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.555352241537054, | |
| "grad_norm": 4.475819110870361, | |
| "learning_rate": 2.452771755961598e-05, | |
| "loss": 0.2589, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.5645013723696248, | |
| "grad_norm": 3.8395609855651855, | |
| "learning_rate": 2.437287085785073e-05, | |
| "loss": 0.2269, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.5736505032021957, | |
| "grad_norm": 4.9355621337890625, | |
| "learning_rate": 2.4218024156085475e-05, | |
| "loss": 0.2625, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.5827996340347665, | |
| "grad_norm": 4.053934097290039, | |
| "learning_rate": 2.4063177454320223e-05, | |
| "loss": 0.2559, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.5919487648673376, | |
| "grad_norm": 5.001983642578125, | |
| "learning_rate": 2.3908330752554972e-05, | |
| "loss": 0.23, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.6010978956999085, | |
| "grad_norm": 5.705740928649902, | |
| "learning_rate": 2.375348405078972e-05, | |
| "loss": 0.2173, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.6102470265324795, | |
| "grad_norm": 4.854909420013428, | |
| "learning_rate": 2.3598637349024465e-05, | |
| "loss": 0.2297, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.6193961573650504, | |
| "grad_norm": 3.785277843475342, | |
| "learning_rate": 2.3443790647259214e-05, | |
| "loss": 0.2065, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.6285452881976212, | |
| "grad_norm": 5.307765960693359, | |
| "learning_rate": 2.3288943945493962e-05, | |
| "loss": 0.2246, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.637694419030192, | |
| "grad_norm": 5.032717704772949, | |
| "learning_rate": 2.3134097243728707e-05, | |
| "loss": 0.2168, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.646843549862763, | |
| "grad_norm": 4.665537357330322, | |
| "learning_rate": 2.2979250541963456e-05, | |
| "loss": 0.2409, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.6559926806953338, | |
| "grad_norm": 4.126980304718018, | |
| "learning_rate": 2.2824403840198204e-05, | |
| "loss": 0.2397, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.6651418115279049, | |
| "grad_norm": 5.973440170288086, | |
| "learning_rate": 2.2669557138432952e-05, | |
| "loss": 0.2654, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.6742909423604757, | |
| "grad_norm": 4.972531795501709, | |
| "learning_rate": 2.25147104366677e-05, | |
| "loss": 0.2636, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.6834400731930468, | |
| "grad_norm": 6.962503910064697, | |
| "learning_rate": 2.235986373490245e-05, | |
| "loss": 0.2629, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.6925892040256176, | |
| "grad_norm": 4.002923488616943, | |
| "learning_rate": 2.2205017033137198e-05, | |
| "loss": 0.2333, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.7017383348581885, | |
| "grad_norm": 5.305150985717773, | |
| "learning_rate": 2.2050170331371943e-05, | |
| "loss": 0.2535, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.7108874656907593, | |
| "grad_norm": 4.577486038208008, | |
| "learning_rate": 2.189532362960669e-05, | |
| "loss": 0.2307, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.7200365965233302, | |
| "grad_norm": 4.220026016235352, | |
| "learning_rate": 2.174047692784144e-05, | |
| "loss": 0.2461, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.729185727355901, | |
| "grad_norm": 5.4357428550720215, | |
| "learning_rate": 2.1585630226076188e-05, | |
| "loss": 0.2297, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.738334858188472, | |
| "grad_norm": 5.218511581420898, | |
| "learning_rate": 2.1430783524310933e-05, | |
| "loss": 0.2419, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.747483989021043, | |
| "grad_norm": 6.166689395904541, | |
| "learning_rate": 2.127593682254568e-05, | |
| "loss": 0.2471, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.756633119853614, | |
| "grad_norm": 5.226531982421875, | |
| "learning_rate": 2.112109012078043e-05, | |
| "loss": 0.238, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.7657822506861849, | |
| "grad_norm": 6.10182523727417, | |
| "learning_rate": 2.0966243419015175e-05, | |
| "loss": 0.2654, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.7749313815187557, | |
| "grad_norm": 4.4128737449646, | |
| "learning_rate": 2.0811396717249923e-05, | |
| "loss": 0.23, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.7840805123513266, | |
| "grad_norm": 4.541961193084717, | |
| "learning_rate": 2.065655001548467e-05, | |
| "loss": 0.2067, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.7932296431838974, | |
| "grad_norm": 8.150908470153809, | |
| "learning_rate": 2.050170331371942e-05, | |
| "loss": 0.224, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.8023787740164683, | |
| "grad_norm": 4.411103248596191, | |
| "learning_rate": 2.0346856611954165e-05, | |
| "loss": 0.2244, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.8115279048490394, | |
| "grad_norm": 4.345833778381348, | |
| "learning_rate": 2.0192009910188913e-05, | |
| "loss": 0.225, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.8206770356816102, | |
| "grad_norm": 4.550020694732666, | |
| "learning_rate": 2.0037163208423662e-05, | |
| "loss": 0.2406, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.8298261665141813, | |
| "grad_norm": 3.8560264110565186, | |
| "learning_rate": 1.988231650665841e-05, | |
| "loss": 0.2461, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.8298261665141813, | |
| "eval_loss": 0.29912057518959045, | |
| "eval_runtime": 422.9004, | |
| "eval_samples_per_second": 1.823, | |
| "eval_steps_per_second": 0.116, | |
| "eval_wer": 0.7281651997291808, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.8389752973467521, | |
| "grad_norm": 5.020371437072754, | |
| "learning_rate": 1.9727469804893155e-05, | |
| "loss": 0.2257, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.848124428179323, | |
| "grad_norm": 6.07639741897583, | |
| "learning_rate": 1.9572623103127904e-05, | |
| "loss": 0.2146, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.8572735590118938, | |
| "grad_norm": 5.103982925415039, | |
| "learning_rate": 1.9417776401362652e-05, | |
| "loss": 0.2248, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.8664226898444647, | |
| "grad_norm": 5.3223042488098145, | |
| "learning_rate": 1.9262929699597397e-05, | |
| "loss": 0.2162, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.8755718206770355, | |
| "grad_norm": 4.5631103515625, | |
| "learning_rate": 1.9108082997832146e-05, | |
| "loss": 0.2279, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.8847209515096066, | |
| "grad_norm": 4.72071647644043, | |
| "learning_rate": 1.8953236296066894e-05, | |
| "loss": 0.251, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.8938700823421775, | |
| "grad_norm": 4.969239234924316, | |
| "learning_rate": 1.8798389594301642e-05, | |
| "loss": 0.2172, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.9030192131747485, | |
| "grad_norm": 4.407639026641846, | |
| "learning_rate": 1.864354289253639e-05, | |
| "loss": 0.2098, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.9121683440073194, | |
| "grad_norm": 3.3802950382232666, | |
| "learning_rate": 1.8488696190771136e-05, | |
| "loss": 0.2192, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.9213174748398902, | |
| "grad_norm": 4.947459697723389, | |
| "learning_rate": 1.8333849489005884e-05, | |
| "loss": 0.2278, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.930466605672461, | |
| "grad_norm": 4.750110626220703, | |
| "learning_rate": 1.8179002787240633e-05, | |
| "loss": 0.2185, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.939615736505032, | |
| "grad_norm": 4.515120506286621, | |
| "learning_rate": 1.802415608547538e-05, | |
| "loss": 0.2045, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.9487648673376028, | |
| "grad_norm": 4.280106067657471, | |
| "learning_rate": 1.786930938371013e-05, | |
| "loss": 0.2051, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.9579139981701739, | |
| "grad_norm": 4.002866268157959, | |
| "learning_rate": 1.7714462681944878e-05, | |
| "loss": 0.2301, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.9670631290027447, | |
| "grad_norm": 4.178459644317627, | |
| "learning_rate": 1.7559615980179623e-05, | |
| "loss": 0.2202, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.9762122598353158, | |
| "grad_norm": 6.406257629394531, | |
| "learning_rate": 1.740476927841437e-05, | |
| "loss": 0.225, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.9853613906678866, | |
| "grad_norm": 4.606039524078369, | |
| "learning_rate": 1.724992257664912e-05, | |
| "loss": 0.2446, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.9945105215004575, | |
| "grad_norm": 4.238482475280762, | |
| "learning_rate": 1.7095075874883865e-05, | |
| "loss": 0.2524, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.0036596523330283, | |
| "grad_norm": 3.64787220954895, | |
| "learning_rate": 1.6940229173118613e-05, | |
| "loss": 0.218, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.012808783165599, | |
| "grad_norm": 3.7717037200927734, | |
| "learning_rate": 1.6785382471353362e-05, | |
| "loss": 0.1669, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.02195791399817, | |
| "grad_norm": 2.716965675354004, | |
| "learning_rate": 1.663053576958811e-05, | |
| "loss": 0.1806, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.031107044830741, | |
| "grad_norm": 4.059733867645264, | |
| "learning_rate": 1.6475689067822855e-05, | |
| "loss": 0.186, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.040256175663312, | |
| "grad_norm": 4.125363349914551, | |
| "learning_rate": 1.6320842366057604e-05, | |
| "loss": 0.1872, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.049405306495883, | |
| "grad_norm": 3.4910032749176025, | |
| "learning_rate": 1.6165995664292352e-05, | |
| "loss": 0.1519, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.058554437328454, | |
| "grad_norm": 2.8993113040924072, | |
| "learning_rate": 1.6011148962527097e-05, | |
| "loss": 0.1744, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.0677035681610247, | |
| "grad_norm": 4.730359077453613, | |
| "learning_rate": 1.5856302260761845e-05, | |
| "loss": 0.1627, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.0768526989935956, | |
| "grad_norm": 5.577477931976318, | |
| "learning_rate": 1.5701455558996594e-05, | |
| "loss": 0.1753, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.0860018298261664, | |
| "grad_norm": 4.823721885681152, | |
| "learning_rate": 1.5546608857231342e-05, | |
| "loss": 0.1821, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.0951509606587373, | |
| "grad_norm": 3.287593364715576, | |
| "learning_rate": 1.5391762155466087e-05, | |
| "loss": 0.1573, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.104300091491308, | |
| "grad_norm": 5.850045204162598, | |
| "learning_rate": 1.5236915453700837e-05, | |
| "loss": 0.1786, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.1134492223238794, | |
| "grad_norm": 4.332837104797363, | |
| "learning_rate": 1.5082068751935586e-05, | |
| "loss": 0.1884, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.1225983531564503, | |
| "grad_norm": 3.266853094100952, | |
| "learning_rate": 1.4927222050170331e-05, | |
| "loss": 0.1589, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.131747483989021, | |
| "grad_norm": 3.742208242416382, | |
| "learning_rate": 1.477237534840508e-05, | |
| "loss": 0.1691, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.140896614821592, | |
| "grad_norm": 4.098796844482422, | |
| "learning_rate": 1.4617528646639828e-05, | |
| "loss": 0.1622, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.150045745654163, | |
| "grad_norm": 5.091181755065918, | |
| "learning_rate": 1.4462681944874576e-05, | |
| "loss": 0.185, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.1591948764867337, | |
| "grad_norm": 3.2334043979644775, | |
| "learning_rate": 1.4307835243109321e-05, | |
| "loss": 0.1573, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.1683440073193045, | |
| "grad_norm": 3.941044569015503, | |
| "learning_rate": 1.415298854134407e-05, | |
| "loss": 0.17, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.1774931381518754, | |
| "grad_norm": 4.680139541625977, | |
| "learning_rate": 1.3998141839578818e-05, | |
| "loss": 0.1652, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.1866422689844462, | |
| "grad_norm": 3.671124219894409, | |
| "learning_rate": 1.3843295137813565e-05, | |
| "loss": 0.1637, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.1957913998170175, | |
| "grad_norm": 3.4199767112731934, | |
| "learning_rate": 1.3688448436048313e-05, | |
| "loss": 0.1822, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.2049405306495884, | |
| "grad_norm": 4.193777084350586, | |
| "learning_rate": 1.353360173428306e-05, | |
| "loss": 0.1724, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.2140896614821592, | |
| "grad_norm": 3.5047738552093506, | |
| "learning_rate": 1.3378755032517808e-05, | |
| "loss": 0.1661, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.22323879231473, | |
| "grad_norm": 4.056273460388184, | |
| "learning_rate": 1.3223908330752555e-05, | |
| "loss": 0.1476, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.232387923147301, | |
| "grad_norm": 5.089756488800049, | |
| "learning_rate": 1.3069061628987303e-05, | |
| "loss": 0.1785, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.241537053979872, | |
| "grad_norm": 3.5870766639709473, | |
| "learning_rate": 1.2914214927222052e-05, | |
| "loss": 0.1835, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.2506861848124426, | |
| "grad_norm": 3.9031713008880615, | |
| "learning_rate": 1.2759368225456797e-05, | |
| "loss": 0.1725, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.259835315645014, | |
| "grad_norm": 4.2854437828063965, | |
| "learning_rate": 1.2604521523691545e-05, | |
| "loss": 0.1774, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.268984446477585, | |
| "grad_norm": 4.6277756690979, | |
| "learning_rate": 1.2449674821926294e-05, | |
| "loss": 0.1583, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.2781335773101556, | |
| "grad_norm": 5.180362224578857, | |
| "learning_rate": 1.229482812016104e-05, | |
| "loss": 0.1636, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.2872827081427265, | |
| "grad_norm": 2.9935238361358643, | |
| "learning_rate": 1.2139981418395789e-05, | |
| "loss": 0.1486, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.2872827081427265, | |
| "eval_loss": 0.2814071476459503, | |
| "eval_runtime": 424.1932, | |
| "eval_samples_per_second": 1.818, | |
| "eval_steps_per_second": 0.116, | |
| "eval_wer": 0.7055808105232615, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.2964318389752973, | |
| "grad_norm": 3.3283779621124268, | |
| "learning_rate": 1.1985134716630536e-05, | |
| "loss": 0.158, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.305580969807868, | |
| "grad_norm": 4.186689376831055, | |
| "learning_rate": 1.1830288014865284e-05, | |
| "loss": 0.1598, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.314730100640439, | |
| "grad_norm": 3.5572612285614014, | |
| "learning_rate": 1.167544131310003e-05, | |
| "loss": 0.1492, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.32387923147301, | |
| "grad_norm": 2.8076884746551514, | |
| "learning_rate": 1.1520594611334779e-05, | |
| "loss": 0.1637, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.3330283623055807, | |
| "grad_norm": 4.602914810180664, | |
| "learning_rate": 1.1365747909569528e-05, | |
| "loss": 0.1526, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.342177493138152, | |
| "grad_norm": 2.5850772857666016, | |
| "learning_rate": 1.1210901207804274e-05, | |
| "loss": 0.159, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.351326623970723, | |
| "grad_norm": 5.045381546020508, | |
| "learning_rate": 1.1056054506039023e-05, | |
| "loss": 0.1503, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.3604757548032937, | |
| "grad_norm": 4.628170967102051, | |
| "learning_rate": 1.090120780427377e-05, | |
| "loss": 0.159, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.3696248856358646, | |
| "grad_norm": 3.4683902263641357, | |
| "learning_rate": 1.0746361102508518e-05, | |
| "loss": 0.1613, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.3787740164684354, | |
| "grad_norm": 4.1546525955200195, | |
| "learning_rate": 1.0591514400743265e-05, | |
| "loss": 0.1482, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.3879231473010063, | |
| "grad_norm": 5.595340251922607, | |
| "learning_rate": 1.0436667698978013e-05, | |
| "loss": 0.1654, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.397072278133577, | |
| "grad_norm": 4.809768199920654, | |
| "learning_rate": 1.028182099721276e-05, | |
| "loss": 0.1457, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.4062214089661484, | |
| "grad_norm": 3.541982889175415, | |
| "learning_rate": 1.0126974295447506e-05, | |
| "loss": 0.163, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.4153705397987193, | |
| "grad_norm": 5.883151054382324, | |
| "learning_rate": 9.972127593682255e-06, | |
| "loss": 0.1761, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.42451967063129, | |
| "grad_norm": 4.718671798706055, | |
| "learning_rate": 9.817280891917002e-06, | |
| "loss": 0.1562, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.433668801463861, | |
| "grad_norm": 3.135131597518921, | |
| "learning_rate": 9.66243419015175e-06, | |
| "loss": 0.1669, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.442817932296432, | |
| "grad_norm": 5.202821254730225, | |
| "learning_rate": 9.507587488386498e-06, | |
| "loss": 0.1748, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.4519670631290027, | |
| "grad_norm": 5.344453811645508, | |
| "learning_rate": 9.352740786621247e-06, | |
| "loss": 0.1641, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.4611161939615736, | |
| "grad_norm": 3.3761284351348877, | |
| "learning_rate": 9.197894084855993e-06, | |
| "loss": 0.1574, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.4702653247941444, | |
| "grad_norm": 5.866576671600342, | |
| "learning_rate": 9.04304738309074e-06, | |
| "loss": 0.1621, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.4794144556267153, | |
| "grad_norm": 4.291085720062256, | |
| "learning_rate": 8.888200681325489e-06, | |
| "loss": 0.1614, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.4885635864592865, | |
| "grad_norm": 2.837286949157715, | |
| "learning_rate": 8.733353979560235e-06, | |
| "loss": 0.1654, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.4977127172918574, | |
| "grad_norm": 3.848227024078369, | |
| "learning_rate": 8.578507277794984e-06, | |
| "loss": 0.1578, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.5068618481244282, | |
| "grad_norm": 3.820240020751953, | |
| "learning_rate": 8.42366057602973e-06, | |
| "loss": 0.1627, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.516010978956999, | |
| "grad_norm": 3.1845788955688477, | |
| "learning_rate": 8.268813874264479e-06, | |
| "loss": 0.1718, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.52516010978957, | |
| "grad_norm": 4.4272236824035645, | |
| "learning_rate": 8.113967172499226e-06, | |
| "loss": 0.1624, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.534309240622141, | |
| "grad_norm": 3.211336374282837, | |
| "learning_rate": 7.959120470733972e-06, | |
| "loss": 0.1571, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.5434583714547117, | |
| "grad_norm": 3.920867443084717, | |
| "learning_rate": 7.80427376896872e-06, | |
| "loss": 0.1499, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.552607502287283, | |
| "grad_norm": 5.123950481414795, | |
| "learning_rate": 7.64942706720347e-06, | |
| "loss": 0.1475, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.5617566331198534, | |
| "grad_norm": 3.7110486030578613, | |
| "learning_rate": 7.494580365438217e-06, | |
| "loss": 0.1552, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.5709057639524246, | |
| "grad_norm": 4.068341255187988, | |
| "learning_rate": 7.3397336636729635e-06, | |
| "loss": 0.1494, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.5800548947849955, | |
| "grad_norm": 4.653831958770752, | |
| "learning_rate": 7.184886961907712e-06, | |
| "loss": 0.161, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.5892040256175664, | |
| "grad_norm": 3.5324552059173584, | |
| "learning_rate": 7.0300402601424595e-06, | |
| "loss": 0.16, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.598353156450137, | |
| "grad_norm": 5.100922107696533, | |
| "learning_rate": 6.875193558377208e-06, | |
| "loss": 0.1549, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.607502287282708, | |
| "grad_norm": 3.772149085998535, | |
| "learning_rate": 6.720346856611955e-06, | |
| "loss": 0.1613, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.616651418115279, | |
| "grad_norm": 4.288483619689941, | |
| "learning_rate": 6.565500154846701e-06, | |
| "loss": 0.1605, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.6258005489478498, | |
| "grad_norm": 3.9227993488311768, | |
| "learning_rate": 6.41065345308145e-06, | |
| "loss": 0.1538, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.634949679780421, | |
| "grad_norm": 3.3688392639160156, | |
| "learning_rate": 6.255806751316197e-06, | |
| "loss": 0.173, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.644098810612992, | |
| "grad_norm": 3.6099278926849365, | |
| "learning_rate": 6.100960049550945e-06, | |
| "loss": 0.1739, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.6532479414455628, | |
| "grad_norm": 3.802189826965332, | |
| "learning_rate": 5.9461133477856925e-06, | |
| "loss": 0.1506, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.6623970722781336, | |
| "grad_norm": 3.382754325866699, | |
| "learning_rate": 5.79126664602044e-06, | |
| "loss": 0.1701, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.6715462031107045, | |
| "grad_norm": 3.056814193725586, | |
| "learning_rate": 5.636419944255188e-06, | |
| "loss": 0.1638, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.6806953339432753, | |
| "grad_norm": 3.345564842224121, | |
| "learning_rate": 5.481573242489935e-06, | |
| "loss": 0.1318, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 2.689844464775846, | |
| "grad_norm": 3.740990400314331, | |
| "learning_rate": 5.326726540724683e-06, | |
| "loss": 0.1611, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.6989935956084175, | |
| "grad_norm": 2.8473143577575684, | |
| "learning_rate": 5.17187983895943e-06, | |
| "loss": 0.1684, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.708142726440988, | |
| "grad_norm": 2.8555662631988525, | |
| "learning_rate": 5.017033137194178e-06, | |
| "loss": 0.1883, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.717291857273559, | |
| "grad_norm": 4.181397438049316, | |
| "learning_rate": 4.8621864354289254e-06, | |
| "loss": 0.1677, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.72644098810613, | |
| "grad_norm": 4.9955949783325195, | |
| "learning_rate": 4.707339733663673e-06, | |
| "loss": 0.1724, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.735590118938701, | |
| "grad_norm": 3.999300956726074, | |
| "learning_rate": 4.552493031898421e-06, | |
| "loss": 0.1433, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.7447392497712717, | |
| "grad_norm": 3.054906129837036, | |
| "learning_rate": 4.397646330133168e-06, | |
| "loss": 0.1796, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.7447392497712717, | |
| "eval_loss": 0.2692735195159912, | |
| "eval_runtime": 414.8696, | |
| "eval_samples_per_second": 1.858, | |
| "eval_steps_per_second": 0.118, | |
| "eval_wer": 0.6916529645033369, | |
| "step": 3000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3279, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.18116487028736e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |