| { |
| "best_global_step": 1529, |
| "best_metric": 0.15474164485931396, |
| "best_model_checkpoint": "./results_ner_lora_entity_aware/checkpoint-1529", |
| "epoch": 1.4903330625507718, |
| "eval_steps": 1529, |
| "global_step": 4587, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003249390739236393, |
| "grad_norm": 0.8849583864212036, |
| "learning_rate": 1.461038961038961e-06, |
| "loss": 0.2156, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006498781478472786, |
| "grad_norm": 0.865159809589386, |
| "learning_rate": 3.0844155844155847e-06, |
| "loss": 0.2016, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00974817221770918, |
| "grad_norm": 0.8396425247192383, |
| "learning_rate": 4.707792207792208e-06, |
| "loss": 0.174, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.012997562956945572, |
| "grad_norm": 0.83138507604599, |
| "learning_rate": 6.331168831168831e-06, |
| "loss": 0.1586, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.016246953696181964, |
| "grad_norm": 0.8232032060623169, |
| "learning_rate": 7.954545454545455e-06, |
| "loss": 0.1361, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01949634443541836, |
| "grad_norm": 0.9345568418502808, |
| "learning_rate": 9.577922077922078e-06, |
| "loss": 0.1184, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.022745735174654752, |
| "grad_norm": 1.0010511875152588, |
| "learning_rate": 1.1201298701298701e-05, |
| "loss": 0.1081, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.025995125913891144, |
| "grad_norm": 1.1553717851638794, |
| "learning_rate": 1.2824675324675326e-05, |
| "loss": 0.1003, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02924451665312754, |
| "grad_norm": 0.9876243472099304, |
| "learning_rate": 1.4448051948051949e-05, |
| "loss": 0.0857, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03249390739236393, |
| "grad_norm": 0.9326199293136597, |
| "learning_rate": 1.6071428571428572e-05, |
| "loss": 0.0828, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03574329813160033, |
| "grad_norm": 0.9165641665458679, |
| "learning_rate": 1.7694805194805193e-05, |
| "loss": 0.0862, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03899268887083672, |
| "grad_norm": 1.2718145847320557, |
| "learning_rate": 1.9318181818181818e-05, |
| "loss": 0.0833, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04224207961007311, |
| "grad_norm": 1.0397502183914185, |
| "learning_rate": 2.0941558441558443e-05, |
| "loss": 0.0765, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.045491470349309504, |
| "grad_norm": 0.9659145474433899, |
| "learning_rate": 2.2564935064935067e-05, |
| "loss": 0.0733, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.048740861088545896, |
| "grad_norm": 0.8223243355751038, |
| "learning_rate": 2.4188311688311692e-05, |
| "loss": 0.0742, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05199025182778229, |
| "grad_norm": 0.7280202507972717, |
| "learning_rate": 2.5811688311688314e-05, |
| "loss": 0.0739, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.05523964256701868, |
| "grad_norm": 0.8244697451591492, |
| "learning_rate": 2.7435064935064935e-05, |
| "loss": 0.0678, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05848903330625508, |
| "grad_norm": 0.633224368095398, |
| "learning_rate": 2.9058441558441563e-05, |
| "loss": 0.0667, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06173842404549147, |
| "grad_norm": 1.4761995077133179, |
| "learning_rate": 3.068181818181818e-05, |
| "loss": 0.0676, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06498781478472786, |
| "grad_norm": 0.9232731461524963, |
| "learning_rate": 3.2305194805194806e-05, |
| "loss": 0.0714, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06823720552396426, |
| "grad_norm": 0.7005584239959717, |
| "learning_rate": 3.392857142857143e-05, |
| "loss": 0.0649, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07148659626320066, |
| "grad_norm": 1.1675697565078735, |
| "learning_rate": 3.5551948051948055e-05, |
| "loss": 0.0661, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07473598700243704, |
| "grad_norm": 0.5797019004821777, |
| "learning_rate": 3.717532467532468e-05, |
| "loss": 0.067, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.07798537774167344, |
| "grad_norm": 0.5873407125473022, |
| "learning_rate": 3.87987012987013e-05, |
| "loss": 0.0615, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08123476848090982, |
| "grad_norm": 0.7156246900558472, |
| "learning_rate": 4.042207792207792e-05, |
| "loss": 0.0675, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08448415922014622, |
| "grad_norm": 0.6001958847045898, |
| "learning_rate": 4.204545454545455e-05, |
| "loss": 0.0609, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.08773354995938261, |
| "grad_norm": 0.5709946751594543, |
| "learning_rate": 4.366883116883117e-05, |
| "loss": 0.0616, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09098294069861901, |
| "grad_norm": 0.5521292686462402, |
| "learning_rate": 4.52922077922078e-05, |
| "loss": 0.0635, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.09423233143785541, |
| "grad_norm": 0.877484917640686, |
| "learning_rate": 4.691558441558442e-05, |
| "loss": 0.0614, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.09748172217709179, |
| "grad_norm": 0.6828113794326782, |
| "learning_rate": 4.853896103896104e-05, |
| "loss": 0.0605, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10073111291632819, |
| "grad_norm": 0.5761043429374695, |
| "learning_rate": 4.999999639259403e-05, |
| "loss": 0.062, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.10398050365556458, |
| "grad_norm": 0.6017851829528809, |
| "learning_rate": 4.999956350513694e-05, |
| "loss": 0.0629, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.10722989439480098, |
| "grad_norm": 0.4347151517868042, |
| "learning_rate": 4.999840915079987e-05, |
| "loss": 0.06, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11047928513403736, |
| "grad_norm": 0.49241673946380615, |
| "learning_rate": 4.9996533362896526e-05, |
| "loss": 0.0625, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.11372867587327376, |
| "grad_norm": 0.45582103729248047, |
| "learning_rate": 4.999393619556062e-05, |
| "loss": 0.0599, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.11697806661251016, |
| "grad_norm": 0.4913433790206909, |
| "learning_rate": 4.999061772374426e-05, |
| "loss": 0.062, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.12022745735174654, |
| "grad_norm": 0.47562339901924133, |
| "learning_rate": 4.998657804321582e-05, |
| "loss": 0.0599, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.12347684809098294, |
| "grad_norm": 0.5014773607254028, |
| "learning_rate": 4.9981817270557166e-05, |
| "loss": 0.0538, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.12672623883021933, |
| "grad_norm": 0.5543435215950012, |
| "learning_rate": 4.9976335543160284e-05, |
| "loss": 0.0628, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.12997562956945571, |
| "grad_norm": 0.5599291920661926, |
| "learning_rate": 4.997013301922333e-05, |
| "loss": 0.0567, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13322502030869213, |
| "grad_norm": 0.46465206146240234, |
| "learning_rate": 4.996320987774606e-05, |
| "loss": 0.0612, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1364744110479285, |
| "grad_norm": 0.5900648236274719, |
| "learning_rate": 4.995556631852464e-05, |
| "loss": 0.0536, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1397238017871649, |
| "grad_norm": 0.402570903301239, |
| "learning_rate": 4.9947202562145925e-05, |
| "loss": 0.0564, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1429731925264013, |
| "grad_norm": 0.45116308331489563, |
| "learning_rate": 4.9938118849981054e-05, |
| "loss": 0.0542, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.1462225832656377, |
| "grad_norm": 0.41757047176361084, |
| "learning_rate": 4.9928315444178496e-05, |
| "loss": 0.0527, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.14947197400487408, |
| "grad_norm": 0.5318567156791687, |
| "learning_rate": 4.991779262765651e-05, |
| "loss": 0.0533, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.15272136474411047, |
| "grad_norm": 0.44866418838500977, |
| "learning_rate": 4.9906550704094935e-05, |
| "loss": 0.0544, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.15597075548334688, |
| "grad_norm": 0.4166482985019684, |
| "learning_rate": 4.989458999792645e-05, |
| "loss": 0.0541, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.15922014622258326, |
| "grad_norm": 0.4640562832355499, |
| "learning_rate": 4.988191085432722e-05, |
| "loss": 0.0544, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.16246953696181965, |
| "grad_norm": 0.4704243242740631, |
| "learning_rate": 4.986851363920693e-05, |
| "loss": 0.0519, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.16571892770105606, |
| "grad_norm": 0.5697548985481262, |
| "learning_rate": 4.9854398739198195e-05, |
| "loss": 0.0527, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.16896831844029245, |
| "grad_norm": 0.5503994822502136, |
| "learning_rate": 4.983956656164545e-05, |
| "loss": 0.0502, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.17221770917952883, |
| "grad_norm": 0.6677025556564331, |
| "learning_rate": 4.982401753459317e-05, |
| "loss": 0.0563, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.17546709991876522, |
| "grad_norm": 0.39828023314476013, |
| "learning_rate": 4.98077521067735e-05, |
| "loss": 0.0557, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.17871649065800163, |
| "grad_norm": 0.40971750020980835, |
| "learning_rate": 4.979077074759333e-05, |
| "loss": 0.0507, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.18196588139723802, |
| "grad_norm": 0.44697558879852295, |
| "learning_rate": 4.9773073947120765e-05, |
| "loss": 0.0533, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.1852152721364744, |
| "grad_norm": 0.5169378519058228, |
| "learning_rate": 4.975466221607092e-05, |
| "loss": 0.0511, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.18846466287571081, |
| "grad_norm": 0.43874937295913696, |
| "learning_rate": 4.973553608579125e-05, |
| "loss": 0.0508, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.1917140536149472, |
| "grad_norm": 0.3906107246875763, |
| "learning_rate": 4.971569610824616e-05, |
| "loss": 0.0558, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.19496344435418358, |
| "grad_norm": 0.4098195433616638, |
| "learning_rate": 4.969514285600113e-05, |
| "loss": 0.0545, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.19821283509341997, |
| "grad_norm": 0.39674079418182373, |
| "learning_rate": 4.967387692220614e-05, |
| "loss": 0.0487, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.20146222583265638, |
| "grad_norm": 0.4392511248588562, |
| "learning_rate": 4.965189892057859e-05, |
| "loss": 0.0487, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.20471161657189277, |
| "grad_norm": 0.36493393778800964, |
| "learning_rate": 4.962920948538555e-05, |
| "loss": 0.0532, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.20796100731112915, |
| "grad_norm": 0.44200050830841064, |
| "learning_rate": 4.9605809271425504e-05, |
| "loss": 0.0492, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.21121039805036557, |
| "grad_norm": 0.42539969086647034, |
| "learning_rate": 4.9581698954009415e-05, |
| "loss": 0.051, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.21445978878960195, |
| "grad_norm": 0.39299121499061584, |
| "learning_rate": 4.9556879228941244e-05, |
| "loss": 0.0507, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.21770917952883834, |
| "grad_norm": 0.36381906270980835, |
| "learning_rate": 4.95313508124979e-05, |
| "loss": 0.0502, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.22095857026807472, |
| "grad_norm": 0.5863086581230164, |
| "learning_rate": 4.950511444140849e-05, |
| "loss": 0.05, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.22420796100731114, |
| "grad_norm": 0.35644617676734924, |
| "learning_rate": 4.9478170872833186e-05, |
| "loss": 0.0521, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.22745735174654752, |
| "grad_norm": 0.40089258551597595, |
| "learning_rate": 4.945052088434124e-05, |
| "loss": 0.0461, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2307067424857839, |
| "grad_norm": 0.3229173421859741, |
| "learning_rate": 4.942216527388861e-05, |
| "loss": 0.0494, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.23395613322502032, |
| "grad_norm": 0.44354256987571716, |
| "learning_rate": 4.939310485979495e-05, |
| "loss": 0.0474, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.2372055239642567, |
| "grad_norm": 0.4226253926753998, |
| "learning_rate": 4.9363340480719954e-05, |
| "loss": 0.0493, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.2404549147034931, |
| "grad_norm": 0.4496769905090332, |
| "learning_rate": 4.9332872995639165e-05, |
| "loss": 0.0442, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.2437043054427295, |
| "grad_norm": 0.41821110248565674, |
| "learning_rate": 4.930170328381919e-05, |
| "loss": 0.0497, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2469536961819659, |
| "grad_norm": 0.32461634278297424, |
| "learning_rate": 4.9269832244792327e-05, |
| "loss": 0.0449, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2502030869212023, |
| "grad_norm": 0.4358930289745331, |
| "learning_rate": 4.9237260798330593e-05, |
| "loss": 0.0448, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.25345247766043866, |
| "grad_norm": 0.41930267214775085, |
| "learning_rate": 4.92039898844192e-05, |
| "loss": 0.0468, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.25670186839967507, |
| "grad_norm": 0.4221108853816986, |
| "learning_rate": 4.91700204632294e-05, |
| "loss": 0.0439, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.25995125913891143, |
| "grad_norm": 0.3895890712738037, |
| "learning_rate": 4.913535351509082e-05, |
| "loss": 0.0455, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.26320064987814784, |
| "grad_norm": 0.36097270250320435, |
| "learning_rate": 4.9099990040463116e-05, |
| "loss": 0.0457, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.26645004061738425, |
| "grad_norm": 0.32470473647117615, |
| "learning_rate": 4.906393105990713e-05, |
| "loss": 0.0461, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.2696994313566206, |
| "grad_norm": 0.3427784740924835, |
| "learning_rate": 4.9027177614055445e-05, |
| "loss": 0.0479, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.272948822095857, |
| "grad_norm": 0.47217708826065063, |
| "learning_rate": 4.898973076358233e-05, |
| "loss": 0.0465, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.27619821283509344, |
| "grad_norm": 0.36148127913475037, |
| "learning_rate": 4.8951591589173156e-05, |
| "loss": 0.0438, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.2794476035743298, |
| "grad_norm": 0.4368058145046234, |
| "learning_rate": 4.891276119149318e-05, |
| "loss": 0.0415, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.2826969943135662, |
| "grad_norm": 0.42918527126312256, |
| "learning_rate": 4.887324069115581e-05, |
| "loss": 0.0446, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.2859463850528026, |
| "grad_norm": 0.3209812641143799, |
| "learning_rate": 4.883303122869024e-05, |
| "loss": 0.0465, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.289195775792039, |
| "grad_norm": 0.44423332810401917, |
| "learning_rate": 4.879213396450854e-05, |
| "loss": 0.0497, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.2924451665312754, |
| "grad_norm": 0.4171295762062073, |
| "learning_rate": 4.8750550078872195e-05, |
| "loss": 0.0485, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2956945572705118, |
| "grad_norm": 0.27637121081352234, |
| "learning_rate": 4.8708280771858e-05, |
| "loss": 0.046, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.29894394800974816, |
| "grad_norm": 0.3019394874572754, |
| "learning_rate": 4.8665327263323445e-05, |
| "loss": 0.045, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3021933387489846, |
| "grad_norm": 0.39697766304016113, |
| "learning_rate": 4.862169079287154e-05, |
| "loss": 0.0415, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.30544272948822093, |
| "grad_norm": 0.4019138813018799, |
| "learning_rate": 4.8577372619815e-05, |
| "loss": 0.0433, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.30869212022745735, |
| "grad_norm": 0.2970142960548401, |
| "learning_rate": 4.85323740231399e-05, |
| "loss": 0.0482, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.31194151096669376, |
| "grad_norm": 0.3531886041164398, |
| "learning_rate": 4.848669630146882e-05, |
| "loss": 0.0439, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.3151909017059301, |
| "grad_norm": 0.5152345895767212, |
| "learning_rate": 4.8440340773023295e-05, |
| "loss": 0.0408, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.31844029244516653, |
| "grad_norm": 0.3633795976638794, |
| "learning_rate": 4.839330877558583e-05, |
| "loss": 0.0436, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.32168968318440294, |
| "grad_norm": 0.43122872710227966, |
| "learning_rate": 4.834560166646126e-05, |
| "loss": 0.0438, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.3249390739236393, |
| "grad_norm": 0.2737273871898651, |
| "learning_rate": 4.8297220822437594e-05, |
| "loss": 0.0424, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3281884646628757, |
| "grad_norm": 0.36643481254577637, |
| "learning_rate": 4.824816763974626e-05, |
| "loss": 0.0408, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.3314378554021121, |
| "grad_norm": 0.37588241696357727, |
| "learning_rate": 4.8198443534021856e-05, |
| "loss": 0.0432, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.3346872461413485, |
| "grad_norm": 0.4051400423049927, |
| "learning_rate": 4.8148049940261244e-05, |
| "loss": 0.0454, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.3379366368805849, |
| "grad_norm": 0.42694568634033203, |
| "learning_rate": 4.8096988312782174e-05, |
| "loss": 0.0427, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.3411860276198213, |
| "grad_norm": 0.3985839784145355, |
| "learning_rate": 4.804526012518129e-05, |
| "loss": 0.0429, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.34443541835905767, |
| "grad_norm": 0.34616586565971375, |
| "learning_rate": 4.7992866870291645e-05, |
| "loss": 0.04, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.3476848090982941, |
| "grad_norm": 0.35213449597358704, |
| "learning_rate": 4.7939810060139534e-05, |
| "loss": 0.0435, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.35093419983753044, |
| "grad_norm": 0.3723832368850708, |
| "learning_rate": 4.788609122590096e-05, |
| "loss": 0.046, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.35418359057676685, |
| "grad_norm": 0.3361580967903137, |
| "learning_rate": 4.783171191785737e-05, |
| "loss": 0.0364, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.35743298131600326, |
| "grad_norm": 0.4315849244594574, |
| "learning_rate": 4.777667370535097e-05, |
| "loss": 0.0392, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3606823720552396, |
| "grad_norm": 0.36335381865501404, |
| "learning_rate": 4.772097817673937e-05, |
| "loss": 0.0375, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.36393176279447603, |
| "grad_norm": 0.3676309883594513, |
| "learning_rate": 4.7664626939349823e-05, |
| "loss": 0.0436, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.36718115353371245, |
| "grad_norm": 0.45313313603401184, |
| "learning_rate": 4.760762161943276e-05, |
| "loss": 0.0407, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.3704305442729488, |
| "grad_norm": 0.3923757076263428, |
| "learning_rate": 4.7549963862114934e-05, |
| "loss": 0.0404, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.3736799350121852, |
| "grad_norm": 0.3564886152744293, |
| "learning_rate": 4.7491655331351884e-05, |
| "loss": 0.0417, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.37692932575142163, |
| "grad_norm": 0.39160194993019104, |
| "learning_rate": 4.7432697709879925e-05, |
| "loss": 0.0405, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.380178716490658, |
| "grad_norm": 0.3885195553302765, |
| "learning_rate": 4.737309269916763e-05, |
| "loss": 0.0351, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.3834281072298944, |
| "grad_norm": 0.37907102704048157, |
| "learning_rate": 4.7312842019366684e-05, |
| "loss": 0.0365, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.3866774979691308, |
| "grad_norm": 0.39848607778549194, |
| "learning_rate": 4.725194740926222e-05, |
| "loss": 0.038, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.38992688870836717, |
| "grad_norm": 0.46821895241737366, |
| "learning_rate": 4.719041062622272e-05, |
| "loss": 0.0405, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3931762794476036, |
| "grad_norm": 0.3871254324913025, |
| "learning_rate": 4.712823344614921e-05, |
| "loss": 0.038, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.39642567018683994, |
| "grad_norm": 0.26873621344566345, |
| "learning_rate": 4.706541766342407e-05, |
| "loss": 0.0377, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.39967506092607635, |
| "grad_norm": 1.015006184577942, |
| "learning_rate": 4.7001965090859224e-05, |
| "loss": 0.0395, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.40292445166531277, |
| "grad_norm": 0.4097539186477661, |
| "learning_rate": 4.693787755964382e-05, |
| "loss": 0.0305, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.4061738424045491, |
| "grad_norm": 0.3266974091529846, |
| "learning_rate": 4.68731569192914e-05, |
| "loss": 0.034, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.40942323314378554, |
| "grad_norm": 0.36162036657333374, |
| "learning_rate": 4.6807805037586514e-05, |
| "loss": 0.0403, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.41267262388302195, |
| "grad_norm": 0.35943084955215454, |
| "learning_rate": 4.674182380053083e-05, |
| "loss": 0.037, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.4159220146222583, |
| "grad_norm": 0.5160156488418579, |
| "learning_rate": 4.667521511228866e-05, |
| "loss": 0.0364, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4191714053614947, |
| "grad_norm": 0.3633994162082672, |
| "learning_rate": 4.660798089513209e-05, |
| "loss": 0.0364, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.42242079610073113, |
| "grad_norm": 0.42284584045410156, |
| "learning_rate": 4.654012308938542e-05, |
| "loss": 0.0363, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4256701868399675, |
| "grad_norm": 0.3621046245098114, |
| "learning_rate": 4.6471643653369226e-05, |
| "loss": 0.0309, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.4289195775792039, |
| "grad_norm": 0.42647165060043335, |
| "learning_rate": 4.64025445633438e-05, |
| "loss": 0.0368, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.4321689683184403, |
| "grad_norm": 0.5478382706642151, |
| "learning_rate": 4.6332827813452175e-05, |
| "loss": 0.0362, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.4354183590576767, |
| "grad_norm": 0.3689424395561218, |
| "learning_rate": 4.62624954156625e-05, |
| "loss": 0.0361, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.4386677497969131, |
| "grad_norm": 0.3740200698375702, |
| "learning_rate": 4.619154939971006e-05, |
| "loss": 0.0369, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.44191714053614944, |
| "grad_norm": 0.3798083961009979, |
| "learning_rate": 4.611999181303861e-05, |
| "loss": 0.0353, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.44516653127538586, |
| "grad_norm": 0.3334667384624481, |
| "learning_rate": 4.6047824720741374e-05, |
| "loss": 0.0371, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.44841592201462227, |
| "grad_norm": 0.4219120740890503, |
| "learning_rate": 4.597505020550138e-05, |
| "loss": 0.036, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.4516653127538586, |
| "grad_norm": 0.24345648288726807, |
| "learning_rate": 4.590167036753141e-05, |
| "loss": 0.0307, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.45491470349309504, |
| "grad_norm": 0.31113502383232117, |
| "learning_rate": 4.582768732451334e-05, |
| "loss": 0.035, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.45816409423233145, |
| "grad_norm": 0.3489152193069458, |
| "learning_rate": 4.575310321153706e-05, |
| "loss": 0.0384, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.4614134849715678, |
| "grad_norm": 0.3709469139575958, |
| "learning_rate": 4.567792018103884e-05, |
| "loss": 0.0374, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.4646628757108042, |
| "grad_norm": 0.40091830492019653, |
| "learning_rate": 4.560214040273922e-05, |
| "loss": 0.036, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.46791226645004064, |
| "grad_norm": 0.4365979731082916, |
| "learning_rate": 4.55257660635804e-05, |
| "loss": 0.0353, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.471161657189277, |
| "grad_norm": 0.3686366677284241, |
| "learning_rate": 4.5448799367663096e-05, |
| "loss": 0.0354, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.4744110479285134, |
| "grad_norm": 0.33025604486465454, |
| "learning_rate": 4.537124253618298e-05, |
| "loss": 0.0345, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.4776604386677498, |
| "grad_norm": 0.4022039771080017, |
| "learning_rate": 4.529309780736654e-05, |
| "loss": 0.0367, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.4809098294069862, |
| "grad_norm": 0.3877394497394562, |
| "learning_rate": 4.521436743640648e-05, |
| "loss": 0.0342, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.4841592201462226, |
| "grad_norm": 0.4671391546726227, |
| "learning_rate": 4.51350536953967e-05, |
| "loss": 0.0337, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.487408610885459, |
| "grad_norm": 0.40457358956336975, |
| "learning_rate": 4.505515887326664e-05, |
| "loss": 0.0302, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.49065800162469536, |
| "grad_norm": 0.3942878544330597, |
| "learning_rate": 4.497468527571529e-05, |
| "loss": 0.0315, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.4939073923639318, |
| "grad_norm": 0.4007840156555176, |
| "learning_rate": 4.4893635225144606e-05, |
| "loss": 0.0301, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.4968318440292445, |
| "eval_loss": 0.15474164485931396, |
| "eval_runtime": 733.5407, |
| "eval_samples_per_second": 3.817, |
| "eval_steps_per_second": 3.817, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.49715678310316813, |
| "grad_norm": 0.4904119372367859, |
| "learning_rate": 4.481201106059251e-05, |
| "loss": 0.0326, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5004061738424046, |
| "grad_norm": 0.41244634985923767, |
| "learning_rate": 4.472981513766538e-05, |
| "loss": 0.0296, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.503655564581641, |
| "grad_norm": 0.4068795442581177, |
| "learning_rate": 4.464704982847008e-05, |
| "loss": 0.0345, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5069049553208773, |
| "grad_norm": 0.3623734712600708, |
| "learning_rate": 4.456371752154549e-05, |
| "loss": 0.0309, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5101543460601138, |
| "grad_norm": 0.3473750054836273, |
| "learning_rate": 4.447982062179358e-05, |
| "loss": 0.0313, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.5134037367993501, |
| "grad_norm": 0.4023807644844055, |
| "learning_rate": 4.439536155041e-05, |
| "loss": 0.0328, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.5166531275385865, |
| "grad_norm": 0.3513451814651489, |
| "learning_rate": 4.4310342744814214e-05, |
| "loss": 0.0343, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.5199025182778229, |
| "grad_norm": 0.37276962399482727, |
| "learning_rate": 4.4224766658579166e-05, |
| "loss": 0.031, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5231519090170593, |
| "grad_norm": 0.456547349691391, |
| "learning_rate": 4.413863576136044e-05, |
| "loss": 0.0317, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.5264012997562957, |
| "grad_norm": 0.3823520541191101, |
| "learning_rate": 4.4051952538825034e-05, |
| "loss": 0.0302, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.529650690495532, |
| "grad_norm": 0.42634570598602295, |
| "learning_rate": 4.3964719492579584e-05, |
| "loss": 0.0288, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.5329000812347685, |
| "grad_norm": 0.33916255831718445, |
| "learning_rate": 4.387693914009819e-05, |
| "loss": 0.0298, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.5361494719740049, |
| "grad_norm": 0.441773921251297, |
| "learning_rate": 4.3788614014649775e-05, |
| "loss": 0.033, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.5393988627132412, |
| "grad_norm": 0.35982316732406616, |
| "learning_rate": 4.3699746665224945e-05, |
| "loss": 0.0267, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.5426482534524777, |
| "grad_norm": 0.3963621258735657, |
| "learning_rate": 4.3610339656462445e-05, |
| "loss": 0.0308, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.545897644191714, |
| "grad_norm": 0.4419509172439575, |
| "learning_rate": 4.352039556857516e-05, |
| "loss": 0.0325, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.5491470349309504, |
| "grad_norm": 0.4380287826061249, |
| "learning_rate": 4.3429916997275626e-05, |
| "loss": 0.0297, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.5523964256701869, |
| "grad_norm": 0.27853381633758545, |
| "learning_rate": 4.333890655370113e-05, |
| "loss": 0.0314, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5556458164094232, |
| "grad_norm": 0.3319164514541626, |
| "learning_rate": 4.324736686433837e-05, |
| "loss": 0.0294, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.5588952071486596, |
| "grad_norm": 0.5052310824394226, |
| "learning_rate": 4.315530057094762e-05, |
| "loss": 0.0314, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.5621445978878961, |
| "grad_norm": 0.2669266164302826, |
| "learning_rate": 4.306271033048655e-05, |
| "loss": 0.0305, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.5653939886271324, |
| "grad_norm": 0.3192387819290161, |
| "learning_rate": 4.2969598815033476e-05, |
| "loss": 0.0274, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.5686433793663688, |
| "grad_norm": 0.5369754433631897, |
| "learning_rate": 4.2875968711710286e-05, |
| "loss": 0.032, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.5718927701056052, |
| "grad_norm": 0.2641260623931885, |
| "learning_rate": 4.2781822722604916e-05, |
| "loss": 0.0272, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.5751421608448416, |
| "grad_norm": 0.4835808575153351, |
| "learning_rate": 4.268716356469331e-05, |
| "loss": 0.0286, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.578391551584078, |
| "grad_norm": 0.27514582872390747, |
| "learning_rate": 4.259199396976107e-05, |
| "loss": 0.0269, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.5816409423233144, |
| "grad_norm": 0.3719632625579834, |
| "learning_rate": 4.2496316684324585e-05, |
| "loss": 0.029, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.5848903330625508, |
| "grad_norm": 0.36428073048591614, |
| "learning_rate": 4.2400134469551746e-05, |
| "loss": 0.0304, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5881397238017871, |
| "grad_norm": 0.6899104714393616, |
| "learning_rate": 4.230345010118233e-05, |
| "loss": 0.0296, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.5913891145410236, |
| "grad_norm": 0.3395729660987854, |
| "learning_rate": 4.220626636944783e-05, |
| "loss": 0.025, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.59463850528026, |
| "grad_norm": 0.38969168066978455, |
| "learning_rate": 4.2108586078990966e-05, |
| "loss": 0.0248, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.5978878960194963, |
| "grad_norm": 0.3905599117279053, |
| "learning_rate": 4.2010412048784733e-05, |
| "loss": 0.0257, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.6011372867587328, |
| "grad_norm": 0.3760197162628174, |
| "learning_rate": 4.191174711205105e-05, |
| "loss": 0.0278, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.6043866774979691, |
| "grad_norm": 0.4263547956943512, |
| "learning_rate": 4.181259411617898e-05, |
| "loss": 0.0248, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.6076360682372055, |
| "grad_norm": 0.46628740429878235, |
| "learning_rate": 4.1712955922642614e-05, |
| "loss": 0.0274, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.6108854589764419, |
| "grad_norm": 0.46346044540405273, |
| "learning_rate": 4.161283540691841e-05, |
| "loss": 0.0248, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.6141348497156783, |
| "grad_norm": 0.37473928928375244, |
| "learning_rate": 4.151223545840225e-05, |
| "loss": 0.0272, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.6173842404549147, |
| "grad_norm": 0.4056950807571411, |
| "learning_rate": 4.141115898032607e-05, |
| "loss": 0.024, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.620633631194151, |
| "grad_norm": 0.4770098626613617, |
| "learning_rate": 4.130960888967405e-05, |
| "loss": 0.0237, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.6238830219333875, |
| "grad_norm": 0.41782036423683167, |
| "learning_rate": 4.1207588117098445e-05, |
| "loss": 0.0272, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.6271324126726239, |
| "grad_norm": 0.4040960669517517, |
| "learning_rate": 4.1105099606835e-05, |
| "loss": 0.0256, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.6303818034118602, |
| "grad_norm": 0.3390992283821106, |
| "learning_rate": 4.1002146316617986e-05, |
| "loss": 0.0276, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.6336311941510967, |
| "grad_norm": 0.4464505910873413, |
| "learning_rate": 4.0898731217594836e-05, |
| "loss": 0.0265, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.6368805848903331, |
| "grad_norm": 0.46650540828704834, |
| "learning_rate": 4.0794857294240415e-05, |
| "loss": 0.0229, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.6401299756295694, |
| "grad_norm": 0.39290061593055725, |
| "learning_rate": 4.0690527544270886e-05, |
| "loss": 0.0207, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.6433793663688059, |
| "grad_norm": 0.38553354144096375, |
| "learning_rate": 4.0585744978557174e-05, |
| "loss": 0.0267, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.6466287571080422, |
| "grad_norm": 0.456086128950119, |
| "learning_rate": 4.048051262103811e-05, |
| "loss": 0.0264, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.6498781478472786, |
| "grad_norm": 0.5561078786849976, |
| "learning_rate": 4.0374833508633156e-05, |
| "loss": 0.0218, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6531275385865151, |
| "grad_norm": 0.3495825231075287, |
| "learning_rate": 4.0268710691154724e-05, |
| "loss": 0.0226, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.6563769293257514, |
| "grad_norm": 0.4553760588169098, |
| "learning_rate": 4.0162147231220216e-05, |
| "loss": 0.024, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.6596263200649878, |
| "grad_norm": 0.40624141693115234, |
| "learning_rate": 4.0055146204163605e-05, |
| "loss": 0.022, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.6628757108042242, |
| "grad_norm": 0.37093663215637207, |
| "learning_rate": 3.994771069794668e-05, |
| "loss": 0.0241, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.6661251015434606, |
| "grad_norm": 0.5362465977668762, |
| "learning_rate": 3.9839843813069984e-05, |
| "loss": 0.0246, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.669374492282697, |
| "grad_norm": 0.4092622995376587, |
| "learning_rate": 3.9731548662483234e-05, |
| "loss": 0.0241, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.6726238830219334, |
| "grad_norm": 0.41743412613868713, |
| "learning_rate": 3.962282837149558e-05, |
| "loss": 0.0232, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.6758732737611698, |
| "grad_norm": 0.5507500171661377, |
| "learning_rate": 3.951368607768537e-05, |
| "loss": 0.0223, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.6791226645004061, |
| "grad_norm": 0.45379722118377686, |
| "learning_rate": 3.9404124930809625e-05, |
| "loss": 0.0242, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.6823720552396426, |
| "grad_norm": 0.36823663115501404, |
| "learning_rate": 3.929414809271308e-05, |
| "loss": 0.0265, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.685621445978879, |
| "grad_norm": 0.3751804530620575, |
| "learning_rate": 3.918375873723701e-05, |
| "loss": 0.0245, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.6888708367181153, |
| "grad_norm": 0.4553548991680145, |
| "learning_rate": 3.907296005012758e-05, |
| "loss": 0.0257, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.6921202274573518, |
| "grad_norm": 0.3335023820400238, |
| "learning_rate": 3.896175522894395e-05, |
| "loss": 0.0226, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.6953696181965882, |
| "grad_norm": 0.4702470600605011, |
| "learning_rate": 3.8850147482965973e-05, |
| "loss": 0.0218, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.6986190089358245, |
| "grad_norm": 0.39953991770744324, |
| "learning_rate": 3.873814003310158e-05, |
| "loss": 0.0194, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.7018683996750609, |
| "grad_norm": 0.5067981481552124, |
| "learning_rate": 3.862573611179381e-05, |
| "loss": 0.022, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.7051177904142973, |
| "grad_norm": 0.30548062920570374, |
| "learning_rate": 3.851293896292756e-05, |
| "loss": 0.0171, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.7083671811535337, |
| "grad_norm": 0.39521142840385437, |
| "learning_rate": 3.839975184173596e-05, |
| "loss": 0.0192, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.7116165718927701, |
| "grad_norm": 0.36969834566116333, |
| "learning_rate": 3.8286178014706395e-05, |
| "loss": 0.0245, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.7148659626320065, |
| "grad_norm": 0.4855635166168213, |
| "learning_rate": 3.8172220759486287e-05, |
| "loss": 0.0229, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7181153533712429, |
| "grad_norm": 0.45929041504859924, |
| "learning_rate": 3.8057883364788475e-05, |
| "loss": 0.0186, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.7213647441104792, |
| "grad_norm": 0.2941083610057831, |
| "learning_rate": 3.7943169130296295e-05, |
| "loss": 0.0188, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.7246141348497157, |
| "grad_norm": 0.3690025806427002, |
| "learning_rate": 3.782808136656839e-05, |
| "loss": 0.0188, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.7278635255889521, |
| "grad_norm": 0.38714373111724854, |
| "learning_rate": 3.771262339494314e-05, |
| "loss": 0.0191, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.7311129163281884, |
| "grad_norm": 0.40680810809135437, |
| "learning_rate": 3.759679854744282e-05, |
| "loss": 0.0197, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.7343623070674249, |
| "grad_norm": 0.41902831196784973, |
| "learning_rate": 3.748061016667745e-05, |
| "loss": 0.0205, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.7376116978066612, |
| "grad_norm": 0.4369294047355652, |
| "learning_rate": 3.736406160574833e-05, |
| "loss": 0.019, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.7408610885458976, |
| "grad_norm": 0.3856930732727051, |
| "learning_rate": 3.724715622815122e-05, |
| "loss": 0.022, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.7441104792851341, |
| "grad_norm": 0.34679755568504333, |
| "learning_rate": 3.712989740767938e-05, |
| "loss": 0.0164, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.7473598700243704, |
| "grad_norm": 0.3927323818206787, |
| "learning_rate": 3.7012288528326086e-05, |
| "loss": 0.0181, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7506092607636068, |
| "grad_norm": 0.4021192491054535, |
| "learning_rate": 3.689433298418706e-05, |
| "loss": 0.0159, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.7538586515028433, |
| "grad_norm": 0.48003751039505005, |
| "learning_rate": 3.6776034179362474e-05, |
| "loss": 0.0177, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.7571080422420796, |
| "grad_norm": 0.3487580716609955, |
| "learning_rate": 3.66573955278587e-05, |
| "loss": 0.0162, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.760357432981316, |
| "grad_norm": 0.47422289848327637, |
| "learning_rate": 3.653842045348985e-05, |
| "loss": 0.018, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.7636068237205524, |
| "grad_norm": 0.38853368163108826, |
| "learning_rate": 3.64191123897789e-05, |
| "loss": 0.0229, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.7668562144597888, |
| "grad_norm": 0.36860230565071106, |
| "learning_rate": 3.62994747798586e-05, |
| "loss": 0.018, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.7701056051990252, |
| "grad_norm": 0.4562481939792633, |
| "learning_rate": 3.617951107637219e-05, |
| "loss": 0.0191, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.7733549959382616, |
| "grad_norm": 0.708402156829834, |
| "learning_rate": 3.605922474137366e-05, |
| "loss": 0.019, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.776604386677498, |
| "grad_norm": 0.48525258898735046, |
| "learning_rate": 3.5938619246227884e-05, |
| "loss": 0.0217, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.7798537774167343, |
| "grad_norm": 0.3320712447166443, |
| "learning_rate": 3.581769807151044e-05, |
| "loss": 0.0195, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.7831031681559708, |
| "grad_norm": 0.36696651577949524, |
| "learning_rate": 3.56964647069072e-05, |
| "loss": 0.0192, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.7863525588952072, |
| "grad_norm": 1.0005451440811157, |
| "learning_rate": 3.55749226511135e-05, |
| "loss": 0.0196, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.7896019496344435, |
| "grad_norm": 0.45593878626823425, |
| "learning_rate": 3.54530754117333e-05, |
| "loss": 0.0225, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.7928513403736799, |
| "grad_norm": 0.33774876594543457, |
| "learning_rate": 3.533092650517793e-05, |
| "loss": 0.0199, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.7961007311129163, |
| "grad_norm": 0.483853816986084, |
| "learning_rate": 3.5208479456564524e-05, |
| "loss": 0.0206, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.7993501218521527, |
| "grad_norm": 0.36340585350990295, |
| "learning_rate": 3.508573779961441e-05, |
| "loss": 0.0172, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.8025995125913891, |
| "grad_norm": 0.515352725982666, |
| "learning_rate": 3.4962705076551026e-05, |
| "loss": 0.0151, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.8058489033306255, |
| "grad_norm": 0.3187580406665802, |
| "learning_rate": 3.483938483799778e-05, |
| "loss": 0.0148, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.8090982940698619, |
| "grad_norm": 0.4531770646572113, |
| "learning_rate": 3.47157806428755e-05, |
| "loss": 0.0158, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.8123476848090982, |
| "grad_norm": 0.6547293066978455, |
| "learning_rate": 3.45918960582998e-05, |
| "loss": 0.0144, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8155970755483347, |
| "grad_norm": 0.24430936574935913, |
| "learning_rate": 3.446773465947809e-05, |
| "loss": 0.0148, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.8188464662875711, |
| "grad_norm": 0.425484299659729, |
| "learning_rate": 3.4343300029606404e-05, |
| "loss": 0.0192, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.8220958570268074, |
| "grad_norm": 0.43435633182525635, |
| "learning_rate": 3.4218595759766013e-05, |
| "loss": 0.0192, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.8253452477660439, |
| "grad_norm": 0.499104380607605, |
| "learning_rate": 3.409362544881977e-05, |
| "loss": 0.0172, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.8285946385052803, |
| "grad_norm": 0.5690107941627502, |
| "learning_rate": 3.3968392703308264e-05, |
| "loss": 0.017, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.8318440292445166, |
| "grad_norm": 0.4789363145828247, |
| "learning_rate": 3.3842901137345725e-05, |
| "loss": 0.0151, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.8350934199837531, |
| "grad_norm": 0.34053072333335876, |
| "learning_rate": 3.3717154372515716e-05, |
| "loss": 0.0155, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.8383428107229894, |
| "grad_norm": 0.44401687383651733, |
| "learning_rate": 3.3591156037766655e-05, |
| "loss": 0.0138, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.8415922014622258, |
| "grad_norm": 0.41682168841362, |
| "learning_rate": 3.346490976930704e-05, |
| "loss": 0.014, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.8448415922014623, |
| "grad_norm": 0.6371095776557922, |
| "learning_rate": 3.333841921050053e-05, |
| "loss": 0.0176, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8480909829406986, |
| "grad_norm": 0.22434203326702118, |
| "learning_rate": 3.3211688011760835e-05, |
| "loss": 0.0123, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.851340373679935, |
| "grad_norm": 0.5385378003120422, |
| "learning_rate": 3.30847198304463e-05, |
| "loss": 0.016, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.8545897644191714, |
| "grad_norm": 0.35033077001571655, |
| "learning_rate": 3.2957518330754406e-05, |
| "loss": 0.0149, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.8578391551584078, |
| "grad_norm": 0.3793995976448059, |
| "learning_rate": 3.2830087183616015e-05, |
| "loss": 0.0153, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.8610885458976442, |
| "grad_norm": 0.6376614570617676, |
| "learning_rate": 3.270243006658942e-05, |
| "loss": 0.0154, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.8643379366368806, |
| "grad_norm": 0.3490144908428192, |
| "learning_rate": 3.257455066375423e-05, |
| "loss": 0.0154, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.867587327376117, |
| "grad_norm": 0.40602752566337585, |
| "learning_rate": 3.244645266560501e-05, |
| "loss": 0.0136, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.8708367181153533, |
| "grad_norm": 0.37300461530685425, |
| "learning_rate": 3.2318139768944856e-05, |
| "loss": 0.0127, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.8740861088545898, |
| "grad_norm": 0.3366054594516754, |
| "learning_rate": 3.218961567677861e-05, |
| "loss": 0.0142, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.8773354995938262, |
| "grad_norm": 0.4088799059391022, |
| "learning_rate": 3.206088409820606e-05, |
| "loss": 0.0143, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.8805848903330625, |
| "grad_norm": 0.3606589734554291, |
| "learning_rate": 3.19319487483149e-05, |
| "loss": 0.0125, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.8838342810722989, |
| "grad_norm": 0.48760858178138733, |
| "learning_rate": 3.180281334807348e-05, |
| "loss": 0.0121, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.8870836718115354, |
| "grad_norm": 0.4494096040725708, |
| "learning_rate": 3.1673481624223426e-05, |
| "loss": 0.0123, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.8903330625507717, |
| "grad_norm": 0.4649289846420288, |
| "learning_rate": 3.154395730917213e-05, |
| "loss": 0.0135, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.8935824532900081, |
| "grad_norm": 0.3580702841281891, |
| "learning_rate": 3.141424414088499e-05, |
| "loss": 0.014, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.8968318440292445, |
| "grad_norm": 0.2921467423439026, |
| "learning_rate": 3.128434586277757e-05, |
| "loss": 0.0146, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.9000812347684809, |
| "grad_norm": 0.5378055572509766, |
| "learning_rate": 3.115426622360752e-05, |
| "loss": 0.0131, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.9033306255077173, |
| "grad_norm": 0.34166646003723145, |
| "learning_rate": 3.102400897736645e-05, |
| "loss": 0.0123, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.9065800162469537, |
| "grad_norm": 0.43183135986328125, |
| "learning_rate": 3.0893577883171556e-05, |
| "loss": 0.0151, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.9098294069861901, |
| "grad_norm": 0.6324542760848999, |
| "learning_rate": 3.076297670515713e-05, |
| "loss": 0.0128, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9130787977254264, |
| "grad_norm": 0.43282851576805115, |
| "learning_rate": 3.063220921236598e-05, |
| "loss": 0.0129, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.9163281884646629, |
| "grad_norm": 0.2942393124103546, |
| "learning_rate": 3.0501279178640575e-05, |
| "loss": 0.0131, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.9195775792038993, |
| "grad_norm": 0.32284924387931824, |
| "learning_rate": 3.0370190382514213e-05, |
| "loss": 0.0103, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.9228269699431356, |
| "grad_norm": 0.38951289653778076, |
| "learning_rate": 3.0238946607101936e-05, |
| "loss": 0.0105, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.9260763606823721, |
| "grad_norm": 0.407099187374115, |
| "learning_rate": 3.0107551639991365e-05, |
| "loss": 0.0109, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.9293257514216084, |
| "grad_norm": 0.5025432705879211, |
| "learning_rate": 2.997600927313338e-05, |
| "loss": 0.0115, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.9325751421608448, |
| "grad_norm": 0.18864522874355316, |
| "learning_rate": 2.98443233027327e-05, |
| "loss": 0.011, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.9358245329000813, |
| "grad_norm": 0.3602016866207123, |
| "learning_rate": 2.971249752913834e-05, |
| "loss": 0.012, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.9390739236393176, |
| "grad_norm": 0.38901951909065247, |
| "learning_rate": 2.958053575673389e-05, |
| "loss": 0.0113, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.942323314378554, |
| "grad_norm": 0.3340912163257599, |
| "learning_rate": 2.944844179382778e-05, |
| "loss": 0.0102, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9455727051177905, |
| "grad_norm": 0.32333633303642273, |
| "learning_rate": 2.931621945254334e-05, |
| "loss": 0.0117, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.9488220958570268, |
| "grad_norm": 0.45609724521636963, |
| "learning_rate": 2.918387254870879e-05, |
| "loss": 0.0114, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.9520714865962632, |
| "grad_norm": 0.3951948285102844, |
| "learning_rate": 2.905140490174713e-05, |
| "loss": 0.0099, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.9553208773354996, |
| "grad_norm": 0.4054865539073944, |
| "learning_rate": 2.8918820334565905e-05, |
| "loss": 0.0118, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.958570268074736, |
| "grad_norm": 0.40964823961257935, |
| "learning_rate": 2.8786122673446893e-05, |
| "loss": 0.0113, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.9618196588139724, |
| "grad_norm": 0.37629735469818115, |
| "learning_rate": 2.865331574793564e-05, |
| "loss": 0.0112, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.9650690495532088, |
| "grad_norm": 0.3705214262008667, |
| "learning_rate": 2.8520403390731e-05, |
| "loss": 0.0117, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.9683184402924452, |
| "grad_norm": 0.3900469243526459, |
| "learning_rate": 2.8387389437574495e-05, |
| "loss": 0.0108, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.9715678310316815, |
| "grad_norm": 0.2905224561691284, |
| "learning_rate": 2.8254277727139616e-05, |
| "loss": 0.0112, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.974817221770918, |
| "grad_norm": 0.6466222405433655, |
| "learning_rate": 2.812107210092105e-05, |
| "loss": 0.0124, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9780666125101544, |
| "grad_norm": 0.5542816519737244, |
| "learning_rate": 2.798777640312381e-05, |
| "loss": 0.0112, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.9813160032493907, |
| "grad_norm": 0.3827550411224365, |
| "learning_rate": 2.7854394480552327e-05, |
| "loss": 0.0112, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.9845653939886271, |
| "grad_norm": 0.32786279916763306, |
| "learning_rate": 2.7720930182499367e-05, |
| "loss": 0.0115, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.9878147847278635, |
| "grad_norm": 0.537501335144043, |
| "learning_rate": 2.7587387360635032e-05, |
| "loss": 0.0113, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.9910641754670999, |
| "grad_norm": 0.47765976190567017, |
| "learning_rate": 2.7453769868895518e-05, |
| "loss": 0.0141, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.993663688058489, |
| "eval_loss": 0.1697877049446106, |
| "eval_runtime": 733.8424, |
| "eval_samples_per_second": 3.816, |
| "eval_steps_per_second": 3.816, |
| "step": 3058 |
| }, |
| { |
| "epoch": 0.9943135662063363, |
| "grad_norm": 0.6474146246910095, |
| "learning_rate": 2.7320081563371948e-05, |
| "loss": 0.0085, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.9975629569455727, |
| "grad_norm": 0.4375881850719452, |
| "learning_rate": 2.718632630219907e-05, |
| "loss": 0.0114, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.0006498781478472, |
| "grad_norm": 0.327910840511322, |
| "learning_rate": 2.7052507945443927e-05, |
| "loss": 0.009, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.0038992688870836, |
| "grad_norm": 0.4386765658855438, |
| "learning_rate": 2.6918630354994434e-05, |
| "loss": 0.0081, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.0071486596263202, |
| "grad_norm": 0.3415273129940033, |
| "learning_rate": 2.6784697394447942e-05, |
| "loss": 0.007, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.0103980503655565, |
| "grad_norm": 0.31640973687171936, |
| "learning_rate": 2.6650712928999755e-05, |
| "loss": 0.009, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.0136474411047929, |
| "grad_norm": 0.4809754490852356, |
| "learning_rate": 2.6516680825331548e-05, |
| "loss": 0.0065, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.0168968318440292, |
| "grad_norm": 0.48411983251571655, |
| "learning_rate": 2.6382604951499802e-05, |
| "loss": 0.0064, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.0201462225832656, |
| "grad_norm": 0.5264196395874023, |
| "learning_rate": 2.624848917682417e-05, |
| "loss": 0.0076, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.023395613322502, |
| "grad_norm": 0.5818049311637878, |
| "learning_rate": 2.6114337371775815e-05, |
| "loss": 0.0069, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.0266450040617385, |
| "grad_norm": 0.20237238705158234, |
| "learning_rate": 2.5980153407865694e-05, |
| "loss": 0.0059, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.0298943948009749, |
| "grad_norm": 0.4652661681175232, |
| "learning_rate": 2.5845941157532856e-05, |
| "loss": 0.0058, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.0331437855402112, |
| "grad_norm": 0.42343178391456604, |
| "learning_rate": 2.5711704494032662e-05, |
| "loss": 0.0072, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.0363931762794476, |
| "grad_norm": 0.31664910912513733, |
| "learning_rate": 2.557744729132503e-05, |
| "loss": 0.0062, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.039642567018684, |
| "grad_norm": 0.4600023627281189, |
| "learning_rate": 2.5443173423962606e-05, |
| "loss": 0.0065, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.0428919577579203, |
| "grad_norm": 0.21867072582244873, |
| "learning_rate": 2.5308886766978985e-05, |
| "loss": 0.0075, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.0461413484971567, |
| "grad_norm": 0.2808971405029297, |
| "learning_rate": 2.517459119577685e-05, |
| "loss": 0.0064, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.0493907392363933, |
| "grad_norm": 0.3623920679092407, |
| "learning_rate": 2.504029058601612e-05, |
| "loss": 0.0057, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.0526401299756296, |
| "grad_norm": 0.7856932282447815, |
| "learning_rate": 2.490598881350215e-05, |
| "loss": 0.0069, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.055889520714866, |
| "grad_norm": 0.3538922965526581, |
| "learning_rate": 2.4771689754073858e-05, |
| "loss": 0.0067, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.0591389114541023, |
| "grad_norm": 0.40585437417030334, |
| "learning_rate": 2.4637397283491828e-05, |
| "loss": 0.0068, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.0623883021933387, |
| "grad_norm": 0.3392549753189087, |
| "learning_rate": 2.450311527732653e-05, |
| "loss": 0.0075, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.065637692932575, |
| "grad_norm": 0.4685852527618408, |
| "learning_rate": 2.436884761084642e-05, |
| "loss": 0.0076, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.0688870836718116, |
| "grad_norm": 0.2836906611919403, |
| "learning_rate": 2.423459815890614e-05, |
| "loss": 0.0063, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.072136474411048, |
| "grad_norm": 0.3446894586086273, |
| "learning_rate": 2.4100370795834652e-05, |
| "loss": 0.0056, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.0753858651502843, |
| "grad_norm": 0.6391892433166504, |
| "learning_rate": 2.3966169395323466e-05, |
| "loss": 0.007, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.0786352558895207, |
| "grad_norm": 0.4981069564819336, |
| "learning_rate": 2.383199783031484e-05, |
| "loss": 0.0069, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.081884646628757, |
| "grad_norm": 0.3438924252986908, |
| "learning_rate": 2.369785997288998e-05, |
| "loss": 0.0062, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.0851340373679934, |
| "grad_norm": 0.6114248633384705, |
| "learning_rate": 2.356375969415735e-05, |
| "loss": 0.006, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.08838342810723, |
| "grad_norm": 0.658473551273346, |
| "learning_rate": 2.3429700864140892e-05, |
| "loss": 0.0057, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.0916328188464663, |
| "grad_norm": 0.2530268430709839, |
| "learning_rate": 2.3295687351668407e-05, |
| "loss": 0.0057, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.0948822095857027, |
| "grad_norm": 0.33400505781173706, |
| "learning_rate": 2.3161723024259832e-05, |
| "loss": 0.0058, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.098131600324939, |
| "grad_norm": 0.47318515181541443, |
| "learning_rate": 2.302781174801569e-05, |
| "loss": 0.0058, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.1013809910641754, |
| "grad_norm": 0.18566595017910004, |
| "learning_rate": 2.2893957387505488e-05, |
| "loss": 0.0048, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.1046303818034118, |
| "grad_norm": 0.43583136796951294, |
| "learning_rate": 2.2760163805656172e-05, |
| "loss": 0.0062, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.1078797725426484, |
| "grad_norm": 0.09334202855825424, |
| "learning_rate": 2.262643486364069e-05, |
| "loss": 0.0054, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.1111291632818847, |
| "grad_norm": 0.4679158627986908, |
| "learning_rate": 2.2492774420766518e-05, |
| "loss": 0.0058, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.114378554021121, |
| "grad_norm": 0.2904549539089203, |
| "learning_rate": 2.2359186334364314e-05, |
| "loss": 0.0062, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.1176279447603574, |
| "grad_norm": 0.2544921338558197, |
| "learning_rate": 2.22256744596766e-05, |
| "loss": 0.0054, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.1208773354995938, |
| "grad_norm": 0.3968259394168854, |
| "learning_rate": 2.2092242649746468e-05, |
| "loss": 0.007, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.1241267262388301, |
| "grad_norm": 0.5143113732337952, |
| "learning_rate": 2.195889475530641e-05, |
| "loss": 0.0063, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.1273761169780667, |
| "grad_norm": 0.39842620491981506, |
| "learning_rate": 2.1825634624667188e-05, |
| "loss": 0.0043, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.130625507717303, |
| "grad_norm": 0.7474611401557922, |
| "learning_rate": 2.169246610360679e-05, |
| "loss": 0.0044, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.1338748984565394, |
| "grad_norm": 0.43382173776626587, |
| "learning_rate": 2.15593930352594e-05, |
| "loss": 0.0068, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.1371242891957758, |
| "grad_norm": 0.43323034048080444, |
| "learning_rate": 2.1426419260004533e-05, |
| "loss": 0.0066, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.1403736799350122, |
| "grad_norm": 0.5351451635360718, |
| "learning_rate": 2.1293548615356175e-05, |
| "loss": 0.0059, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.1436230706742485, |
| "grad_norm": 0.3306446373462677, |
| "learning_rate": 2.1160784935852065e-05, |
| "loss": 0.0063, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.1468724614134849, |
| "grad_norm": 0.8898324370384216, |
| "learning_rate": 2.1028132052942995e-05, |
| "loss": 0.0059, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.1501218521527214, |
| "grad_norm": 0.3271152973175049, |
| "learning_rate": 2.0895593794882268e-05, |
| "loss": 0.0055, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.1533712428919578, |
| "grad_norm": 0.22426745295524597, |
| "learning_rate": 2.0763173986615216e-05, |
| "loss": 0.005, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.1566206336311942, |
| "grad_norm": 0.42375096678733826, |
| "learning_rate": 2.063087644966879e-05, |
| "loss": 0.0049, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.1598700243704305, |
| "grad_norm": 0.4661770761013031, |
| "learning_rate": 2.04987050020413e-05, |
| "loss": 0.0048, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.1631194151096669, |
| "grad_norm": 0.4002689719200134, |
| "learning_rate": 2.0366663458092224e-05, |
| "loss": 0.0045, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.1663688058489032, |
| "grad_norm": 0.5181555151939392, |
| "learning_rate": 2.0234755628432133e-05, |
| "loss": 0.0043, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.1696181965881398, |
| "grad_norm": 0.4838791787624359, |
| "learning_rate": 2.0102985319812688e-05, |
| "loss": 0.0059, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.1728675873273762, |
| "grad_norm": 0.5321421027183533, |
| "learning_rate": 1.9971356335016834e-05, |
| "loss": 0.0062, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.1761169780666125, |
| "grad_norm": 0.17948143184185028, |
| "learning_rate": 1.9839872472749013e-05, |
| "loss": 0.0046, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.1793663688058489, |
| "grad_norm": 0.5810254216194153, |
| "learning_rate": 1.9708537527525544e-05, |
| "loss": 0.0051, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.1826157595450852, |
| "grad_norm": 0.32746565341949463, |
| "learning_rate": 1.957735528956514e-05, |
| "loss": 0.0061, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.1858651502843216, |
| "grad_norm": 0.359332412481308, |
| "learning_rate": 1.9446329544679488e-05, |
| "loss": 0.0058, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.189114541023558, |
| "grad_norm": 0.4285711944103241, |
| "learning_rate": 1.9315464074164036e-05, |
| "loss": 0.004, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.1923639317627945, |
| "grad_norm": 0.42309272289276123, |
| "learning_rate": 1.918476265468882e-05, |
| "loss": 0.0049, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.195613322502031, |
| "grad_norm": 0.495802640914917, |
| "learning_rate": 1.9054229058189514e-05, |
| "loss": 0.0046, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.1988627132412673, |
| "grad_norm": 0.39304494857788086, |
| "learning_rate": 1.892386705175856e-05, |
| "loss": 0.0043, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.2021121039805036, |
| "grad_norm": 0.30474936962127686, |
| "learning_rate": 1.879368039753644e-05, |
| "loss": 0.0042, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.20536149471974, |
| "grad_norm": 0.19054022431373596, |
| "learning_rate": 1.866367285260312e-05, |
| "loss": 0.0045, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.2086108854589765, |
| "grad_norm": 0.38206177949905396, |
| "learning_rate": 1.853384816886962e-05, |
| "loss": 0.0056, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.211860276198213, |
| "grad_norm": 0.34758618474006653, |
| "learning_rate": 1.840421009296975e-05, |
| "loss": 0.0034, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.2151096669374493, |
| "grad_norm": 0.3336513638496399, |
| "learning_rate": 1.827476236615194e-05, |
| "loss": 0.0051, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.2183590576766856, |
| "grad_norm": 0.6265475153923035, |
| "learning_rate": 1.8145508724171316e-05, |
| "loss": 0.0032, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.221608448415922, |
| "grad_norm": 0.4233214259147644, |
| "learning_rate": 1.80164528971819e-05, |
| "loss": 0.0063, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.2248578391551583, |
| "grad_norm": 0.2057565301656723, |
| "learning_rate": 1.7887598609628897e-05, |
| "loss": 0.0037, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.2281072298943947, |
| "grad_norm": 0.2737014889717102, |
| "learning_rate": 1.7758949580141276e-05, |
| "loss": 0.0047, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.2313566206336313, |
| "grad_norm": 0.38340964913368225, |
| "learning_rate": 1.7630509521424407e-05, |
| "loss": 0.0049, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.2346060113728676, |
| "grad_norm": 0.3404456377029419, |
| "learning_rate": 1.750228214015295e-05, |
| "loss": 0.0034, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.237855402112104, |
| "grad_norm": 0.40690556168556213, |
| "learning_rate": 1.7374271136863863e-05, |
| "loss": 0.0042, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.2411047928513403, |
| "grad_norm": 0.22202616930007935, |
| "learning_rate": 1.7246480205849613e-05, |
| "loss": 0.0046, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.2443541835905767, |
| "grad_norm": 0.37885475158691406, |
| "learning_rate": 1.7118913035051564e-05, |
| "loss": 0.004, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.2476035743298133, |
| "grad_norm": 0.16762322187423706, |
| "learning_rate": 1.6991573305953533e-05, |
| "loss": 0.0034, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.2508529650690496, |
| "grad_norm": 0.5190407633781433, |
| "learning_rate": 1.686446469347558e-05, |
| "loss": 0.0042, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.254102355808286, |
| "grad_norm": 0.290955126285553, |
| "learning_rate": 1.6737590865867907e-05, |
| "loss": 0.0056, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.2573517465475224, |
| "grad_norm": 0.240234836935997, |
| "learning_rate": 1.6610955484605023e-05, |
| "loss": 0.0034, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.2606011372867587, |
| "grad_norm": 0.29844263195991516, |
| "learning_rate": 1.6484562204280075e-05, |
| "loss": 0.0038, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.263850528025995, |
| "grad_norm": 0.3929229974746704, |
| "learning_rate": 1.6358414672499377e-05, |
| "loss": 0.0051, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.2670999187652314, |
| "grad_norm": 0.3525027632713318, |
| "learning_rate": 1.623251652977713e-05, |
| "loss": 0.0037, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.2703493095044678, |
| "grad_norm": 0.20320917665958405, |
| "learning_rate": 1.6106871409430387e-05, |
| "loss": 0.0044, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.2735987002437044, |
| "grad_norm": 0.1730404794216156, |
| "learning_rate": 1.5981482937474172e-05, |
| "loss": 0.0039, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.2768480909829407, |
| "grad_norm": 0.3508148789405823, |
| "learning_rate": 1.5856354732516865e-05, |
| "loss": 0.0041, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.280097481722177, |
| "grad_norm": 0.35546788573265076, |
| "learning_rate": 1.573149040565572e-05, |
| "loss": 0.0043, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.2833468724614134, |
| "grad_norm": 0.2085341513156891, |
| "learning_rate": 1.5606893560372714e-05, |
| "loss": 0.0043, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.28659626320065, |
| "grad_norm": 0.18922321498394012, |
| "learning_rate": 1.548256779243052e-05, |
| "loss": 0.0037, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.2898456539398864, |
| "grad_norm": 0.3243728280067444, |
| "learning_rate": 1.5358516689768734e-05, |
| "loss": 0.0044, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.2930950446791227, |
| "grad_norm": 0.37964773178100586, |
| "learning_rate": 1.5234743832400344e-05, |
| "loss": 0.0031, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.296344435418359, |
| "grad_norm": 0.3203120529651642, |
| "learning_rate": 1.5111252792308406e-05, |
| "loss": 0.0031, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.2995938261575954, |
| "grad_norm": 0.1457476019859314, |
| "learning_rate": 1.4988047133342964e-05, |
| "loss": 0.0042, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.3028432168968318, |
| "grad_norm": 0.14640583097934723, |
| "learning_rate": 1.486513041111819e-05, |
| "loss": 0.003, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.3060926076360682, |
| "grad_norm": 0.5449049472808838, |
| "learning_rate": 1.4742506172909775e-05, |
| "loss": 0.0033, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.3093419983753045, |
| "grad_norm": 0.40858981013298035, |
| "learning_rate": 1.4620177957552578e-05, |
| "loss": 0.0038, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.312591389114541, |
| "grad_norm": 0.5728728175163269, |
| "learning_rate": 1.4498149295338464e-05, |
| "loss": 0.0031, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.3158407798537775, |
| "grad_norm": 0.24417610466480255, |
| "learning_rate": 1.437642370791446e-05, |
| "loss": 0.0034, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.3190901705930138, |
| "grad_norm": 0.317682683467865, |
| "learning_rate": 1.4255004708181075e-05, |
| "loss": 0.0028, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.3223395613322502, |
| "grad_norm": 0.6588785648345947, |
| "learning_rate": 1.4133895800190983e-05, |
| "loss": 0.004, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.3255889520714865, |
| "grad_norm": 0.44252049922943115, |
| "learning_rate": 1.4013100479047825e-05, |
| "loss": 0.0034, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.328838342810723, |
| "grad_norm": 0.4248214066028595, |
| "learning_rate": 1.3892622230805436e-05, |
| "loss": 0.005, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.3320877335499595, |
| "grad_norm": 0.31828951835632324, |
| "learning_rate": 1.3772464532367125e-05, |
| "loss": 0.0026, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.3353371242891958, |
| "grad_norm": 0.25999122858047485, |
| "learning_rate": 1.3652630851385454e-05, |
| "loss": 0.0035, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.3385865150284322, |
| "grad_norm": 0.15871325135231018, |
| "learning_rate": 1.353312464616207e-05, |
| "loss": 0.0035, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.3418359057676685, |
| "grad_norm": 0.44427499175071716, |
| "learning_rate": 1.341394936554794e-05, |
| "loss": 0.0028, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.345085296506905, |
| "grad_norm": 0.306194543838501, |
| "learning_rate": 1.329510844884385e-05, |
| "loss": 0.0026, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.3483346872461413, |
| "grad_norm": 0.19543297588825226, |
| "learning_rate": 1.3176605325701086e-05, |
| "loss": 0.0028, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.3515840779853776, |
| "grad_norm": 0.21941304206848145, |
| "learning_rate": 1.305844341602249e-05, |
| "loss": 0.0018, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.3548334687246142, |
| "grad_norm": 0.36652112007141113, |
| "learning_rate": 1.2940626129863792e-05, |
| "loss": 0.003, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.3580828594638505, |
| "grad_norm": 0.47468826174736023, |
| "learning_rate": 1.282315686733514e-05, |
| "loss": 0.0025, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.361332250203087, |
| "grad_norm": 0.3629470765590668, |
| "learning_rate": 1.2706039018503013e-05, |
| "loss": 0.003, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.3645816409423233, |
| "grad_norm": 0.3716108500957489, |
| "learning_rate": 1.2589275963292397e-05, |
| "loss": 0.0027, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.3678310316815598, |
| "grad_norm": 0.8420029878616333, |
| "learning_rate": 1.2472871071389205e-05, |
| "loss": 0.0027, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.3710804224207962, |
| "grad_norm": 0.43932396173477173, |
| "learning_rate": 1.2356827702143048e-05, |
| "loss": 0.003, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.3743298131600326, |
| "grad_norm": 0.28611013293266296, |
| "learning_rate": 1.2241149204470314e-05, |
| "loss": 0.0034, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.377579203899269, |
| "grad_norm": 0.30783408880233765, |
| "learning_rate": 1.2125838916757471e-05, |
| "loss": 0.0034, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.3808285946385053, |
| "grad_norm": 0.16370636224746704, |
| "learning_rate": 1.2010900166764774e-05, |
| "loss": 0.0028, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.3840779853777416, |
| "grad_norm": 0.055241186171770096, |
| "learning_rate": 1.1896336271530187e-05, |
| "loss": 0.0035, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.387327376116978, |
| "grad_norm": 0.42322880029678345, |
| "learning_rate": 1.1782150537273665e-05, |
| "loss": 0.0029, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.3905767668562143, |
| "grad_norm": 0.12644466757774353, |
| "learning_rate": 1.166834625930178e-05, |
| "loss": 0.0022, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.393826157595451, |
| "grad_norm": 0.24443909525871277, |
| "learning_rate": 1.1554926721912562e-05, |
| "loss": 0.0022, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.3970755483346873, |
| "grad_norm": 0.12778101861476898, |
| "learning_rate": 1.144189519830074e-05, |
| "loss": 0.003, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.4003249390739236, |
| "grad_norm": 0.15853983163833618, |
| "learning_rate": 1.1329254950463315e-05, |
| "loss": 0.0023, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.40357432981316, |
| "grad_norm": 0.11951529234647751, |
| "learning_rate": 1.1217009229105357e-05, |
| "loss": 0.0031, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.4068237205523964, |
| "grad_norm": 0.21437332034111023, |
| "learning_rate": 1.1105161273546236e-05, |
| "loss": 0.0019, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.410073111291633, |
| "grad_norm": 0.8929743766784668, |
| "learning_rate": 1.0993714311626146e-05, |
| "loss": 0.0024, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.4133225020308693, |
| "grad_norm": 0.2045769989490509, |
| "learning_rate": 1.0882671559612909e-05, |
| "loss": 0.0029, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.4165718927701056, |
| "grad_norm": 0.08286549896001816, |
| "learning_rate": 1.0772036222109182e-05, |
| "loss": 0.0029, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.419821283509342, |
| "grad_norm": 0.09282595664262772, |
| "learning_rate": 1.066181149196e-05, |
| "loss": 0.0018, |
| "step": 4370 |
| }, |
| { |
| "epoch": 1.4230706742485784, |
| "grad_norm": 0.2858879566192627, |
| "learning_rate": 1.055200055016057e-05, |
| "loss": 0.0029, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.4263200649878147, |
| "grad_norm": 0.2344265580177307, |
| "learning_rate": 1.0442606565764534e-05, |
| "loss": 0.0019, |
| "step": 4390 |
| }, |
| { |
| "epoch": 1.429569455727051, |
| "grad_norm": 0.346629798412323, |
| "learning_rate": 1.0333632695792492e-05, |
| "loss": 0.0025, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.4328188464662874, |
| "grad_norm": 0.29290953278541565, |
| "learning_rate": 1.0225082085140856e-05, |
| "loss": 0.0022, |
| "step": 4410 |
| }, |
| { |
| "epoch": 1.436068237205524, |
| "grad_norm": 0.43641456961631775, |
| "learning_rate": 1.0116957866491128e-05, |
| "loss": 0.0021, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.4393176279447604, |
| "grad_norm": 0.3562842309474945, |
| "learning_rate": 1.000926316021952e-05, |
| "loss": 0.0022, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.4425670186839967, |
| "grad_norm": 0.4262392520904541, |
| "learning_rate": 9.902001074306835e-06, |
| "loss": 0.0028, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.445816409423233, |
| "grad_norm": 0.2457405924797058, |
| "learning_rate": 9.795174704248808e-06, |
| "loss": 0.0025, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.4490658001624697, |
| "grad_norm": 0.22532138228416443, |
| "learning_rate": 9.6887871329668e-06, |
| "loss": 0.0019, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.452315190901706, |
| "grad_norm": 0.4915858507156372, |
| "learning_rate": 9.582841430718767e-06, |
| "loss": 0.0023, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.4555645816409424, |
| "grad_norm": 0.18646268546581268, |
| "learning_rate": 9.477340655010716e-06, |
| "loss": 0.0021, |
| "step": 4480 |
| }, |
| { |
| "epoch": 1.4588139723801787, |
| "grad_norm": 0.10206873714923859, |
| "learning_rate": 9.372287850508421e-06, |
| "loss": 0.0017, |
| "step": 4490 |
| }, |
| { |
| "epoch": 1.462063363119415, |
| "grad_norm": 0.3298508822917938, |
| "learning_rate": 9.267686048949568e-06, |
| "loss": 0.0018, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.4653127538586515, |
| "grad_norm": 0.5361196398735046, |
| "learning_rate": 9.163538269056296e-06, |
| "loss": 0.003, |
| "step": 4510 |
| }, |
| { |
| "epoch": 1.4685621445978878, |
| "grad_norm": 0.41454723477363586, |
| "learning_rate": 9.05984751644803e-06, |
| "loss": 0.0019, |
| "step": 4520 |
| }, |
| { |
| "epoch": 1.4718115353371242, |
| "grad_norm": 0.47003769874572754, |
| "learning_rate": 8.956616783554759e-06, |
| "loss": 0.0025, |
| "step": 4530 |
| }, |
| { |
| "epoch": 1.4750609260763607, |
| "grad_norm": 0.2163703888654709, |
| "learning_rate": 8.853849049530703e-06, |
| "loss": 0.0018, |
| "step": 4540 |
| }, |
| { |
| "epoch": 1.478310316815597, |
| "grad_norm": 0.08466655015945435, |
| "learning_rate": 8.751547280168297e-06, |
| "loss": 0.0021, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.4815597075548335, |
| "grad_norm": 0.2865130305290222, |
| "learning_rate": 8.649714427812607e-06, |
| "loss": 0.0017, |
| "step": 4560 |
| }, |
| { |
| "epoch": 1.4848090982940698, |
| "grad_norm": 0.2946512699127197, |
| "learning_rate": 8.548353431276182e-06, |
| "loss": 0.0019, |
| "step": 4570 |
| }, |
| { |
| "epoch": 1.4880584890333062, |
| "grad_norm": 0.4205271303653717, |
| "learning_rate": 8.447467215754157e-06, |
| "loss": 0.0021, |
| "step": 4580 |
| }, |
| { |
| "epoch": 1.4903330625507718, |
| "eval_loss": 0.20176434516906738, |
| "eval_runtime": 733.5854, |
| "eval_samples_per_second": 3.817, |
| "eval_steps_per_second": 3.817, |
| "step": 4587 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 6156, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 1529, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 2 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.5805200990465556e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|