| { | |
| "best_metric": 22.283451007404857, | |
| "best_model_checkpoint": "./whisper-large-v2/second2/checkpoint-500", | |
| "epoch": 0.2133902373966391, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0010669511869831954, | |
| "grad_norm": 19.418699264526367, | |
| "learning_rate": 3.3333333333333334e-08, | |
| "loss": 2.4187, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.002133902373966391, | |
| "grad_norm": 21.75494956970215, | |
| "learning_rate": 6.666666666666667e-08, | |
| "loss": 2.3362, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0032008535609495867, | |
| "grad_norm": 21.226938247680664, | |
| "learning_rate": 1e-07, | |
| "loss": 2.4288, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.004267804747932782, | |
| "grad_norm": 27.21125602722168, | |
| "learning_rate": 1.3333333333333334e-07, | |
| "loss": 2.4004, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.005334755934915978, | |
| "grad_norm": 13.559316635131836, | |
| "learning_rate": 1.6666666666666665e-07, | |
| "loss": 2.1607, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0064017071218991735, | |
| "grad_norm": 13.044219970703125, | |
| "learning_rate": 1.9333333333333332e-07, | |
| "loss": 2.1591, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.007468658308882369, | |
| "grad_norm": 13.89301872253418, | |
| "learning_rate": 2.2666666666666663e-07, | |
| "loss": 2.0855, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.008535609495865563, | |
| "grad_norm": 14.154369354248047, | |
| "learning_rate": 2.6e-07, | |
| "loss": 1.9557, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.00960256068284876, | |
| "grad_norm": 9.126311302185059, | |
| "learning_rate": 2.933333333333333e-07, | |
| "loss": 1.8661, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.010669511869831956, | |
| "grad_norm": 13.619270324707031, | |
| "learning_rate": 3.2666666666666663e-07, | |
| "loss": 1.8424, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01173646305681515, | |
| "grad_norm": 9.875323295593262, | |
| "learning_rate": 3.6e-07, | |
| "loss": 1.6208, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.012803414243798347, | |
| "grad_norm": 11.143688201904297, | |
| "learning_rate": 3.933333333333333e-07, | |
| "loss": 1.5396, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.013870365430781541, | |
| "grad_norm": 10.829161643981934, | |
| "learning_rate": 4.266666666666667e-07, | |
| "loss": 1.4174, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.014937316617764738, | |
| "grad_norm": 7.393435001373291, | |
| "learning_rate": 4.6e-07, | |
| "loss": 1.1693, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.016004267804747934, | |
| "grad_norm": 4.182456016540527, | |
| "learning_rate": 4.933333333333333e-07, | |
| "loss": 1.1779, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.017071218991731127, | |
| "grad_norm": 8.4662504196167, | |
| "learning_rate": 5.266666666666666e-07, | |
| "loss": 1.1281, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.018138170178714323, | |
| "grad_norm": 4.043624401092529, | |
| "learning_rate": 5.6e-07, | |
| "loss": 1.0614, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.01920512136569752, | |
| "grad_norm": 3.8517227172851562, | |
| "learning_rate": 5.933333333333334e-07, | |
| "loss": 1.0066, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.020272072552680716, | |
| "grad_norm": 4.19964075088501, | |
| "learning_rate": 6.266666666666667e-07, | |
| "loss": 1.0659, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.021339023739663912, | |
| "grad_norm": 4.7087178230285645, | |
| "learning_rate": 6.6e-07, | |
| "loss": 0.9793, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.022405974926647105, | |
| "grad_norm": 4.864803314208984, | |
| "learning_rate": 6.933333333333333e-07, | |
| "loss": 0.9675, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.0234729261136303, | |
| "grad_norm": 3.8939764499664307, | |
| "learning_rate": 7.266666666666667e-07, | |
| "loss": 0.9513, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.024539877300613498, | |
| "grad_norm": 3.557497978210449, | |
| "learning_rate": 7.599999999999999e-07, | |
| "loss": 0.9164, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.025606828487596694, | |
| "grad_norm": 4.033596038818359, | |
| "learning_rate": 7.933333333333333e-07, | |
| "loss": 0.9069, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.026673779674579887, | |
| "grad_norm": 4.726081371307373, | |
| "learning_rate": 8.266666666666667e-07, | |
| "loss": 0.8976, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.027740730861563083, | |
| "grad_norm": 18.711572647094727, | |
| "learning_rate": 8.599999999999999e-07, | |
| "loss": 0.9157, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.02880768204854628, | |
| "grad_norm": 4.13236141204834, | |
| "learning_rate": 8.933333333333333e-07, | |
| "loss": 0.8808, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.029874633235529476, | |
| "grad_norm": 3.7335939407348633, | |
| "learning_rate": 9.266666666666665e-07, | |
| "loss": 0.7707, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03094158442251267, | |
| "grad_norm": 4.5814313888549805, | |
| "learning_rate": 9.6e-07, | |
| "loss": 0.8349, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.03200853560949587, | |
| "grad_norm": 4.993039608001709, | |
| "learning_rate": 9.933333333333333e-07, | |
| "loss": 0.7002, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.033075486796479064, | |
| "grad_norm": 5.326510906219482, | |
| "learning_rate": 9.996791443850267e-07, | |
| "loss": 0.6418, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.034142437983462254, | |
| "grad_norm": 4.4001994132995605, | |
| "learning_rate": 9.99144385026738e-07, | |
| "loss": 0.5632, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.03520938917044545, | |
| "grad_norm": 71.39772033691406, | |
| "learning_rate": 9.98716577540107e-07, | |
| "loss": 0.6374, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.036276340357428646, | |
| "grad_norm": 9.561308860778809, | |
| "learning_rate": 9.98288770053476e-07, | |
| "loss": 0.5493, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.03734329154441184, | |
| "grad_norm": 2.866039991378784, | |
| "learning_rate": 9.97754010695187e-07, | |
| "loss": 0.473, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.03841024273139504, | |
| "grad_norm": 3.0788004398345947, | |
| "learning_rate": 9.972192513368983e-07, | |
| "loss": 0.4903, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.039477193918378235, | |
| "grad_norm": 3.4059643745422363, | |
| "learning_rate": 9.966844919786097e-07, | |
| "loss": 0.5017, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.04054414510536143, | |
| "grad_norm": 2.9747097492218018, | |
| "learning_rate": 9.961497326203208e-07, | |
| "loss": 0.4269, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.04161109629234463, | |
| "grad_norm": 3.476583242416382, | |
| "learning_rate": 9.95614973262032e-07, | |
| "loss": 0.4214, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.042678047479327824, | |
| "grad_norm": 3.0810964107513428, | |
| "learning_rate": 9.950802139037432e-07, | |
| "loss": 0.4291, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.043744998666311014, | |
| "grad_norm": 87.03646850585938, | |
| "learning_rate": 9.945454545454544e-07, | |
| "loss": 0.4035, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.04481194985329421, | |
| "grad_norm": 2.6879165172576904, | |
| "learning_rate": 9.940106951871658e-07, | |
| "loss": 0.428, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.045878901040277406, | |
| "grad_norm": 3.602217197418213, | |
| "learning_rate": 9.93475935828877e-07, | |
| "loss": 0.4534, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.0469458522272606, | |
| "grad_norm": 2.9568989276885986, | |
| "learning_rate": 9.929411764705881e-07, | |
| "loss": 0.42, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0480128034142438, | |
| "grad_norm": 2.7602076530456543, | |
| "learning_rate": 9.924064171122995e-07, | |
| "loss": 0.448, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.049079754601226995, | |
| "grad_norm": 2.7191200256347656, | |
| "learning_rate": 9.918716577540107e-07, | |
| "loss": 0.4046, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.05014670578821019, | |
| "grad_norm": 2.7233424186706543, | |
| "learning_rate": 9.913368983957219e-07, | |
| "loss": 0.4028, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.05121365697519339, | |
| "grad_norm": 2.730400323867798, | |
| "learning_rate": 9.90802139037433e-07, | |
| "loss": 0.4361, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.05228060816217658, | |
| "grad_norm": 4.667561054229736, | |
| "learning_rate": 9.902673796791442e-07, | |
| "loss": 0.4183, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.05334755934915977, | |
| "grad_norm": 2.981497049331665, | |
| "learning_rate": 9.897326203208556e-07, | |
| "loss": 0.4148, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05441451053614297, | |
| "grad_norm": 9.336112976074219, | |
| "learning_rate": 9.891978609625668e-07, | |
| "loss": 0.4386, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.055481461723126166, | |
| "grad_norm": 4.434041500091553, | |
| "learning_rate": 9.88663101604278e-07, | |
| "loss": 0.4235, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.05654841291010936, | |
| "grad_norm": 2.695866346359253, | |
| "learning_rate": 9.881283422459892e-07, | |
| "loss": 0.4424, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.05761536409709256, | |
| "grad_norm": 2.8247873783111572, | |
| "learning_rate": 9.875935828877004e-07, | |
| "loss": 0.387, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.058682315284075755, | |
| "grad_norm": 3.4680376052856445, | |
| "learning_rate": 9.870588235294118e-07, | |
| "loss": 0.4294, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.05974926647105895, | |
| "grad_norm": 2.6994473934173584, | |
| "learning_rate": 9.86524064171123e-07, | |
| "loss": 0.4043, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.06081621765804215, | |
| "grad_norm": 2.6048827171325684, | |
| "learning_rate": 9.859893048128341e-07, | |
| "loss": 0.4064, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.06188316884502534, | |
| "grad_norm": 3.028503179550171, | |
| "learning_rate": 9.854545454545455e-07, | |
| "loss": 0.4269, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.06295012003200853, | |
| "grad_norm": 3.3406739234924316, | |
| "learning_rate": 9.849197860962567e-07, | |
| "loss": 0.4339, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.06401707121899174, | |
| "grad_norm": 3.1800878047943115, | |
| "learning_rate": 9.843850267379679e-07, | |
| "loss": 0.3972, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06508402240597493, | |
| "grad_norm": 2.818852424621582, | |
| "learning_rate": 9.83850267379679e-07, | |
| "loss": 0.3898, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.06615097359295813, | |
| "grad_norm": 3.256840705871582, | |
| "learning_rate": 9.833155080213902e-07, | |
| "loss": 0.4377, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.06721792477994132, | |
| "grad_norm": 3.388472318649292, | |
| "learning_rate": 9.827807486631016e-07, | |
| "loss": 0.4187, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.06828487596692451, | |
| "grad_norm": 4.434413909912109, | |
| "learning_rate": 9.822459893048128e-07, | |
| "loss": 0.4009, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.06935182715390771, | |
| "grad_norm": 3.08677077293396, | |
| "learning_rate": 9.81711229946524e-07, | |
| "loss": 0.3837, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.0704187783408909, | |
| "grad_norm": 3.7852623462677, | |
| "learning_rate": 9.811764705882352e-07, | |
| "loss": 0.3999, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.0714857295278741, | |
| "grad_norm": 2.947720766067505, | |
| "learning_rate": 9.806417112299463e-07, | |
| "loss": 0.3677, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.07255268071485729, | |
| "grad_norm": 3.1685659885406494, | |
| "learning_rate": 9.801069518716577e-07, | |
| "loss": 0.4035, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.0736196319018405, | |
| "grad_norm": 2.661599636077881, | |
| "learning_rate": 9.79572192513369e-07, | |
| "loss": 0.3658, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.07468658308882369, | |
| "grad_norm": 2.6443653106689453, | |
| "learning_rate": 9.7903743315508e-07, | |
| "loss": 0.3805, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07575353427580689, | |
| "grad_norm": 3.653778314590454, | |
| "learning_rate": 9.785026737967915e-07, | |
| "loss": 0.407, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.07682048546279008, | |
| "grad_norm": 2.991504430770874, | |
| "learning_rate": 9.779679144385027e-07, | |
| "loss": 0.4135, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.07788743664977327, | |
| "grad_norm": 2.7770955562591553, | |
| "learning_rate": 9.774331550802139e-07, | |
| "loss": 0.3934, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.07895438783675647, | |
| "grad_norm": 2.686368703842163, | |
| "learning_rate": 9.76898395721925e-07, | |
| "loss": 0.409, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.08002133902373966, | |
| "grad_norm": 3.307638168334961, | |
| "learning_rate": 9.763636363636362e-07, | |
| "loss": 0.3872, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.08108829021072286, | |
| "grad_norm": 8.76164722442627, | |
| "learning_rate": 9.758288770053476e-07, | |
| "loss": 0.3882, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.08215524139770605, | |
| "grad_norm": 2.868077039718628, | |
| "learning_rate": 9.752941176470588e-07, | |
| "loss": 0.4123, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.08322219258468926, | |
| "grad_norm": 4.677827835083008, | |
| "learning_rate": 9.7475935828877e-07, | |
| "loss": 0.362, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.08428914377167245, | |
| "grad_norm": 3.836914539337158, | |
| "learning_rate": 9.742245989304812e-07, | |
| "loss": 0.4023, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.08535609495865565, | |
| "grad_norm": 2.7120203971862793, | |
| "learning_rate": 9.736898395721923e-07, | |
| "loss": 0.419, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08642304614563884, | |
| "grad_norm": 3.1341233253479004, | |
| "learning_rate": 9.731550802139037e-07, | |
| "loss": 0.4275, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.08748999733262203, | |
| "grad_norm": 3.0901923179626465, | |
| "learning_rate": 9.72620320855615e-07, | |
| "loss": 0.3799, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.08855694851960523, | |
| "grad_norm": 2.7124781608581543, | |
| "learning_rate": 9.72085561497326e-07, | |
| "loss": 0.3788, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.08962389970658842, | |
| "grad_norm": 2.8177292346954346, | |
| "learning_rate": 9.715508021390375e-07, | |
| "loss": 0.3436, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.09069085089357162, | |
| "grad_norm": 3.063669204711914, | |
| "learning_rate": 9.710160427807487e-07, | |
| "loss": 0.4312, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.09175780208055481, | |
| "grad_norm": 2.78320574760437, | |
| "learning_rate": 9.704812834224598e-07, | |
| "loss": 0.4122, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.09282475326753802, | |
| "grad_norm": 3.080400228500366, | |
| "learning_rate": 9.69946524064171e-07, | |
| "loss": 0.3917, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.0938917044545212, | |
| "grad_norm": 3.02703857421875, | |
| "learning_rate": 9.694117647058822e-07, | |
| "loss": 0.3546, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.09495865564150441, | |
| "grad_norm": 2.6559834480285645, | |
| "learning_rate": 9.688770053475936e-07, | |
| "loss": 0.3536, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.0960256068284876, | |
| "grad_norm": 3.939589738845825, | |
| "learning_rate": 9.683422459893048e-07, | |
| "loss": 0.3729, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.09709255801547079, | |
| "grad_norm": 2.419799327850342, | |
| "learning_rate": 9.67807486631016e-07, | |
| "loss": 0.3474, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.09815950920245399, | |
| "grad_norm": 2.237165927886963, | |
| "learning_rate": 9.672727272727271e-07, | |
| "loss": 0.3387, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.09922646038943718, | |
| "grad_norm": 2.6400506496429443, | |
| "learning_rate": 9.667379679144385e-07, | |
| "loss": 0.3462, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.10029341157642038, | |
| "grad_norm": 3.294222354888916, | |
| "learning_rate": 9.662032085561497e-07, | |
| "loss": 0.3724, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.10136036276340357, | |
| "grad_norm": 2.5825111865997314, | |
| "learning_rate": 9.656684491978609e-07, | |
| "loss": 0.3652, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.10242731395038678, | |
| "grad_norm": 2.6807165145874023, | |
| "learning_rate": 9.65133689839572e-07, | |
| "loss": 0.3824, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.10349426513736996, | |
| "grad_norm": 2.541398048400879, | |
| "learning_rate": 9.645989304812835e-07, | |
| "loss": 0.3509, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.10456121632435315, | |
| "grad_norm": 3.071420192718506, | |
| "learning_rate": 9.640641711229946e-07, | |
| "loss": 0.3511, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.10562816751133636, | |
| "grad_norm": 3.117623805999756, | |
| "learning_rate": 9.635294117647058e-07, | |
| "loss": 0.3823, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.10669511869831955, | |
| "grad_norm": 2.9759390354156494, | |
| "learning_rate": 9.62994652406417e-07, | |
| "loss": 0.3595, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.10669511869831955, | |
| "eval_bleu": 0.5858525890811711, | |
| "eval_cer": 5.81035140115651, | |
| "eval_loss": 0.35532379150390625, | |
| "eval_runtime": 1038.2775, | |
| "eval_samples_per_second": 0.884, | |
| "eval_steps_per_second": 0.222, | |
| "eval_wer": 22.283451007404857, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.10776206988530275, | |
| "grad_norm": 2.9231984615325928, | |
| "learning_rate": 9.475679090334807e-07, | |
| "loss": 0.3783, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.10882902107228594, | |
| "grad_norm": 2.2739880084991455, | |
| "learning_rate": 9.470414824173509e-07, | |
| "loss": 0.3238, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.10989597225926914, | |
| "grad_norm": 2.603306531906128, | |
| "learning_rate": 9.465150558012212e-07, | |
| "loss": 0.3538, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.11096292344625233, | |
| "grad_norm": 2.871011972427368, | |
| "learning_rate": 9.459886291850916e-07, | |
| "loss": 0.3955, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.11202987463323553, | |
| "grad_norm": 3.134646415710449, | |
| "learning_rate": 9.454622025689618e-07, | |
| "loss": 0.3508, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.11309682582021872, | |
| "grad_norm": 3.014796733856201, | |
| "learning_rate": 9.449357759528321e-07, | |
| "loss": 0.3664, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.11416377700720191, | |
| "grad_norm": 3.0675926208496094, | |
| "learning_rate": 9.444093493367024e-07, | |
| "loss": 0.3497, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.11523072819418512, | |
| "grad_norm": 4.508389472961426, | |
| "learning_rate": 9.438829227205727e-07, | |
| "loss": 0.3172, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1162976793811683, | |
| "grad_norm": 2.528317451477051, | |
| "learning_rate": 9.43356496104443e-07, | |
| "loss": 0.3495, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.11736463056815151, | |
| "grad_norm": 2.759575843811035, | |
| "learning_rate": 9.428300694883133e-07, | |
| "loss": 0.3326, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1184315817551347, | |
| "grad_norm": 6.699812889099121, | |
| "learning_rate": 9.423036428721835e-07, | |
| "loss": 0.4124, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.1194985329421179, | |
| "grad_norm": 2.358922243118286, | |
| "learning_rate": 9.417772162560539e-07, | |
| "loss": 0.3292, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.12056548412910109, | |
| "grad_norm": 2.3635692596435547, | |
| "learning_rate": 9.412507896399241e-07, | |
| "loss": 0.3715, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.1216324353160843, | |
| "grad_norm": 2.8696706295013428, | |
| "learning_rate": 9.407243630237944e-07, | |
| "loss": 0.3348, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.12269938650306748, | |
| "grad_norm": 3.717510223388672, | |
| "learning_rate": 9.401979364076647e-07, | |
| "loss": 0.355, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.12376633769005067, | |
| "grad_norm": 3.9285202026367188, | |
| "learning_rate": 9.39671509791535e-07, | |
| "loss": 0.3752, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.12483328887703388, | |
| "grad_norm": 3.560582399368286, | |
| "learning_rate": 9.391450831754053e-07, | |
| "loss": 0.3422, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.12590024006401707, | |
| "grad_norm": 6.333406925201416, | |
| "learning_rate": 9.386186565592757e-07, | |
| "loss": 0.3436, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.12696719125100026, | |
| "grad_norm": 2.94331431388855, | |
| "learning_rate": 9.380922299431458e-07, | |
| "loss": 0.3346, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.12803414243798347, | |
| "grad_norm": 2.933142900466919, | |
| "learning_rate": 9.375658033270162e-07, | |
| "loss": 0.3414, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.12910109362496666, | |
| "grad_norm": 3.2017970085144043, | |
| "learning_rate": 9.370393767108865e-07, | |
| "loss": 0.3731, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.13016804481194985, | |
| "grad_norm": 3.2127702236175537, | |
| "learning_rate": 9.365129500947567e-07, | |
| "loss": 0.3399, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.13123499599893304, | |
| "grad_norm": 3.1433818340301514, | |
| "learning_rate": 9.35986523478627e-07, | |
| "loss": 0.342, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.13230194718591626, | |
| "grad_norm": 3.0913007259368896, | |
| "learning_rate": 9.354600968624973e-07, | |
| "loss": 0.3543, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.13336889837289945, | |
| "grad_norm": 3.459428310394287, | |
| "learning_rate": 9.349336702463676e-07, | |
| "loss": 0.292, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.13443584955988264, | |
| "grad_norm": 2.965162754058838, | |
| "learning_rate": 9.34407243630238e-07, | |
| "loss": 0.3591, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.13550280074686583, | |
| "grad_norm": 2.476099967956543, | |
| "learning_rate": 9.338808170141081e-07, | |
| "loss": 0.3351, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.13656975193384902, | |
| "grad_norm": 2.853848695755005, | |
| "learning_rate": 9.333543903979785e-07, | |
| "loss": 0.3828, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.13763670312083223, | |
| "grad_norm": 2.560877561569214, | |
| "learning_rate": 9.328279637818488e-07, | |
| "loss": 0.3183, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.13870365430781542, | |
| "grad_norm": 2.7191262245178223, | |
| "learning_rate": 9.32301537165719e-07, | |
| "loss": 0.3572, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.1397706054947986, | |
| "grad_norm": 3.388456106185913, | |
| "learning_rate": 9.317751105495893e-07, | |
| "loss": 0.3207, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.1408375566817818, | |
| "grad_norm": 2.827470302581787, | |
| "learning_rate": 9.312486839334597e-07, | |
| "loss": 0.3576, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.14190450786876502, | |
| "grad_norm": 2.630094528198242, | |
| "learning_rate": 9.307222573173299e-07, | |
| "loss": 0.3616, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.1429714590557482, | |
| "grad_norm": 2.7217891216278076, | |
| "learning_rate": 9.301958307012003e-07, | |
| "loss": 0.3434, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.1440384102427314, | |
| "grad_norm": 2.558335065841675, | |
| "learning_rate": 9.296694040850705e-07, | |
| "loss": 0.3516, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.14510536142971459, | |
| "grad_norm": 3.1991679668426514, | |
| "learning_rate": 9.291429774689408e-07, | |
| "loss": 0.338, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.14617231261669777, | |
| "grad_norm": 4.686666011810303, | |
| "learning_rate": 9.286165508528111e-07, | |
| "loss": 0.3539, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.147239263803681, | |
| "grad_norm": 3.0826447010040283, | |
| "learning_rate": 9.280901242366814e-07, | |
| "loss": 0.3313, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.14830621499066418, | |
| "grad_norm": 2.656141757965088, | |
| "learning_rate": 9.275636976205516e-07, | |
| "loss": 0.3454, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.14937316617764737, | |
| "grad_norm": 4.859818935394287, | |
| "learning_rate": 9.27037271004422e-07, | |
| "loss": 0.3331, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.15044011736463056, | |
| "grad_norm": 5.376903057098389, | |
| "learning_rate": 9.265108443882922e-07, | |
| "loss": 0.3377, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.15150706855161378, | |
| "grad_norm": 2.7976577281951904, | |
| "learning_rate": 9.259844177721626e-07, | |
| "loss": 0.3768, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.15257401973859697, | |
| "grad_norm": 2.993427038192749, | |
| "learning_rate": 9.254579911560328e-07, | |
| "loss": 0.3509, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.15364097092558016, | |
| "grad_norm": 2.8581597805023193, | |
| "learning_rate": 9.249315645399031e-07, | |
| "loss": 0.3338, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.15470792211256335, | |
| "grad_norm": 2.8063721656799316, | |
| "learning_rate": 9.244051379237734e-07, | |
| "loss": 0.3597, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.15577487329954653, | |
| "grad_norm": 2.7333686351776123, | |
| "learning_rate": 9.238787113076438e-07, | |
| "loss": 0.3563, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.15684182448652975, | |
| "grad_norm": 3.4143598079681396, | |
| "learning_rate": 9.233522846915139e-07, | |
| "loss": 0.336, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.15790877567351294, | |
| "grad_norm": 2.84298038482666, | |
| "learning_rate": 9.228258580753843e-07, | |
| "loss": 0.3651, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.15897572686049613, | |
| "grad_norm": 5.912104606628418, | |
| "learning_rate": 9.222994314592545e-07, | |
| "loss": 0.3531, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.16004267804747932, | |
| "grad_norm": 3.0039525032043457, | |
| "learning_rate": 9.217730048431249e-07, | |
| "loss": 0.3212, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.16110962923446254, | |
| "grad_norm": 3.2109341621398926, | |
| "learning_rate": 9.212465782269951e-07, | |
| "loss": 0.3206, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.16217658042144573, | |
| "grad_norm": 2.7238008975982666, | |
| "learning_rate": 9.207201516108654e-07, | |
| "loss": 0.3696, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.16324353160842892, | |
| "grad_norm": 2.6420886516571045, | |
| "learning_rate": 9.201937249947357e-07, | |
| "loss": 0.3339, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.1643104827954121, | |
| "grad_norm": 2.8822038173675537, | |
| "learning_rate": 9.196672983786061e-07, | |
| "loss": 0.3611, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.1653774339823953, | |
| "grad_norm": 2.558979034423828, | |
| "learning_rate": 9.191408717624762e-07, | |
| "loss": 0.3247, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.1664443851693785, | |
| "grad_norm": 2.649867057800293, | |
| "learning_rate": 9.186144451463465e-07, | |
| "loss": 0.3146, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.1675113363563617, | |
| "grad_norm": 2.6174542903900146, | |
| "learning_rate": 9.180880185302169e-07, | |
| "loss": 0.3319, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.1685782875433449, | |
| "grad_norm": 2.65977144241333, | |
| "learning_rate": 9.175615919140871e-07, | |
| "loss": 0.3643, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.16964523873032808, | |
| "grad_norm": 3.2722222805023193, | |
| "learning_rate": 9.170351652979574e-07, | |
| "loss": 0.3349, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.1707121899173113, | |
| "grad_norm": 2.346200466156006, | |
| "learning_rate": 9.165087386818277e-07, | |
| "loss": 0.3028, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.17177914110429449, | |
| "grad_norm": 2.675050735473633, | |
| "learning_rate": 9.15982312065698e-07, | |
| "loss": 0.3256, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.17284609229127768, | |
| "grad_norm": 2.4576005935668945, | |
| "learning_rate": 9.154558854495683e-07, | |
| "loss": 0.3127, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.17391304347826086, | |
| "grad_norm": 2.73311710357666, | |
| "learning_rate": 9.150347441566645e-07, | |
| "loss": 0.3508, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.17497999466524405, | |
| "grad_norm": 2.6573374271392822, | |
| "learning_rate": 9.145083175405348e-07, | |
| "loss": 0.3643, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.17604694585222727, | |
| "grad_norm": 2.7585701942443848, | |
| "learning_rate": 9.139818909244052e-07, | |
| "loss": 0.3321, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.17711389703921046, | |
| "grad_norm": 2.8242616653442383, | |
| "learning_rate": 9.134554643082753e-07, | |
| "loss": 0.3175, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.17818084822619365, | |
| "grad_norm": 3.1244609355926514, | |
| "learning_rate": 9.129290376921457e-07, | |
| "loss": 0.3641, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.17924779941317684, | |
| "grad_norm": 2.4807212352752686, | |
| "learning_rate": 9.12402611076016e-07, | |
| "loss": 0.3464, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.18031475060016006, | |
| "grad_norm": 4.587203502655029, | |
| "learning_rate": 9.118761844598863e-07, | |
| "loss": 0.3199, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.18138170178714325, | |
| "grad_norm": 3.1150426864624023, | |
| "learning_rate": 9.113497578437566e-07, | |
| "loss": 0.3129, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.18244865297412644, | |
| "grad_norm": 2.80098557472229, | |
| "learning_rate": 9.108233312276268e-07, | |
| "loss": 0.3296, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.18351560416110962, | |
| "grad_norm": 2.6048800945281982, | |
| "learning_rate": 9.102969046114971e-07, | |
| "loss": 0.3426, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.1845825553480928, | |
| "grad_norm": 2.4776954650878906, | |
| "learning_rate": 9.097704779953675e-07, | |
| "loss": 0.3389, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.18564950653507603, | |
| "grad_norm": 3.662856101989746, | |
| "learning_rate": 9.092440513792377e-07, | |
| "loss": 0.3191, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.18671645772205922, | |
| "grad_norm": 5.855990886688232, | |
| "learning_rate": 9.08717624763108e-07, | |
| "loss": 0.3491, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.1877834089090424, | |
| "grad_norm": 2.6081950664520264, | |
| "learning_rate": 9.081911981469783e-07, | |
| "loss": 0.3256, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.1888503600960256, | |
| "grad_norm": 3.206470251083374, | |
| "learning_rate": 9.076647715308485e-07, | |
| "loss": 0.3419, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.18991731128300882, | |
| "grad_norm": 3.7217066287994385, | |
| "learning_rate": 9.071383449147189e-07, | |
| "loss": 0.3369, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.190984262469992, | |
| "grad_norm": 4.7247633934021, | |
| "learning_rate": 9.066119182985891e-07, | |
| "loss": 0.3664, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.1920512136569752, | |
| "grad_norm": 3.455446481704712, | |
| "learning_rate": 9.060854916824594e-07, | |
| "loss": 0.3171, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.19311816484395838, | |
| "grad_norm": 2.6066224575042725, | |
| "learning_rate": 9.055590650663297e-07, | |
| "loss": 0.3266, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.19418511603094157, | |
| "grad_norm": 3.2545228004455566, | |
| "learning_rate": 9.050326384502e-07, | |
| "loss": 0.3201, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.1952520672179248, | |
| "grad_norm": 3.8174829483032227, | |
| "learning_rate": 9.045062118340702e-07, | |
| "loss": 0.3537, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.19631901840490798, | |
| "grad_norm": 3.725991725921631, | |
| "learning_rate": 9.039797852179406e-07, | |
| "loss": 0.3373, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.19738596959189117, | |
| "grad_norm": 2.5402047634124756, | |
| "learning_rate": 9.034533586018108e-07, | |
| "loss": 0.3107, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.19845292077887436, | |
| "grad_norm": 2.3887853622436523, | |
| "learning_rate": 9.029269319856812e-07, | |
| "loss": 0.3255, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.19951987196585758, | |
| "grad_norm": 2.83178973197937, | |
| "learning_rate": 9.024005053695514e-07, | |
| "loss": 0.3192, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.20058682315284077, | |
| "grad_norm": 2.540933132171631, | |
| "learning_rate": 9.018740787534217e-07, | |
| "loss": 0.3414, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.20165377433982395, | |
| "grad_norm": 2.799577474594116, | |
| "learning_rate": 9.01347652137292e-07, | |
| "loss": 0.3383, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.20272072552680714, | |
| "grad_norm": 2.5945661067962646, | |
| "learning_rate": 9.008212255211624e-07, | |
| "loss": 0.3414, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.20378767671379033, | |
| "grad_norm": 3.102372407913208, | |
| "learning_rate": 9.002947989050325e-07, | |
| "loss": 0.33, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.20485462790077355, | |
| "grad_norm": 3.1992921829223633, | |
| "learning_rate": 8.997683722889029e-07, | |
| "loss": 0.3719, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.20592157908775674, | |
| "grad_norm": 3.082578420639038, | |
| "learning_rate": 8.992419456727732e-07, | |
| "loss": 0.3412, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.20698853027473993, | |
| "grad_norm": 2.406475067138672, | |
| "learning_rate": 8.987155190566435e-07, | |
| "loss": 0.3069, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.20805548146172312, | |
| "grad_norm": 3.033055067062378, | |
| "learning_rate": 8.981890924405137e-07, | |
| "loss": 0.3683, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.2091224326487063, | |
| "grad_norm": 2.807874917984009, | |
| "learning_rate": 8.97662665824384e-07, | |
| "loss": 0.3024, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.21018938383568952, | |
| "grad_norm": 2.6849870681762695, | |
| "learning_rate": 8.971362392082543e-07, | |
| "loss": 0.3248, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.21125633502267271, | |
| "grad_norm": 3.144150495529175, | |
| "learning_rate": 8.966098125921247e-07, | |
| "loss": 0.3158, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.2123232862096559, | |
| "grad_norm": 2.495368003845215, | |
| "learning_rate": 8.960833859759948e-07, | |
| "loss": 0.312, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.2133902373966391, | |
| "grad_norm": 2.2597124576568604, | |
| "learning_rate": 8.955569593598652e-07, | |
| "loss": 0.3053, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2133902373966391, | |
| "eval_bleu": 0.6173395426659279, | |
| "eval_cer": 5.420346953040605, | |
| "eval_loss": 0.3327235281467438, | |
| "eval_runtime": 1022.0953, | |
| "eval_samples_per_second": 0.898, | |
| "eval_steps_per_second": 0.225, | |
| "eval_wer": 20.46380804775845, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 9500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.358829453312e+20, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |