{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 123883, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.072132576705439e-05, "grad_norm": 4.204583644866943, "learning_rate": 2.25e-06, "loss": 9.488965606689453, "step": 10 }, { "epoch": 0.00016144265153410879, "grad_norm": 2.082395553588867, "learning_rate": 4.75e-06, "loss": 8.715921020507812, "step": 20 }, { "epoch": 0.0002421639773011632, "grad_norm": 2.545424461364746, "learning_rate": 7.25e-06, "loss": 8.008010101318359, "step": 30 }, { "epoch": 0.00032288530306821757, "grad_norm": 1.4784176349639893, "learning_rate": 9.75e-06, "loss": 7.8124847412109375, "step": 40 }, { "epoch": 0.000403606628835272, "grad_norm": 3.0447421073913574, "learning_rate": 1.2250000000000001e-05, "loss": 7.964997863769531, "step": 50 }, { "epoch": 0.0004843279546023264, "grad_norm": 2.1193063259124756, "learning_rate": 1.4750000000000003e-05, "loss": 7.662926483154297, "step": 60 }, { "epoch": 0.0005650492803693808, "grad_norm": 1.804943561553955, "learning_rate": 1.7250000000000003e-05, "loss": 6.9937286376953125, "step": 70 }, { "epoch": 0.0006457706061364351, "grad_norm": 1.2089874744415283, "learning_rate": 1.9750000000000002e-05, "loss": 7.757846832275391, "step": 80 }, { "epoch": 0.0007264919319034896, "grad_norm": 1.256369948387146, "learning_rate": 1.999854607723561e-05, "loss": 7.261299896240234, "step": 90 }, { "epoch": 0.000807213257670544, "grad_norm": 3.93025279045105, "learning_rate": 1.9996930607497397e-05, "loss": 7.928846740722657, "step": 100 }, { "epoch": 0.0008879345834375984, "grad_norm": 1.3987377882003784, "learning_rate": 1.9995315137759185e-05, "loss": 6.963916778564453, "step": 110 }, { "epoch": 0.0009686559092046528, "grad_norm": 1.2347275018692017, "learning_rate": 1.999369966802097e-05, "loss": 7.051486206054688, "step": 120 }, { "epoch": 0.0010493772349717072, "grad_norm": 2.972804546356201, "learning_rate": 1.9992084198282757e-05, "loss": 7.137745666503906, "step": 130 }, { "epoch": 0.0011300985607387616, "grad_norm": 1.655303955078125, "learning_rate": 1.9990468728544544e-05, "loss": 7.067646026611328, "step": 140 }, { "epoch": 0.001210819886505816, "grad_norm": 1.5094283819198608, "learning_rate": 1.9988853258806332e-05, "loss": 6.408104705810547, "step": 150 }, { "epoch": 0.0012915412122728703, "grad_norm": 2.09006667137146, "learning_rate": 1.9987237789068116e-05, "loss": 6.840589904785157, "step": 160 }, { "epoch": 0.0013722625380399249, "grad_norm": 1.0452735424041748, "learning_rate": 1.9985622319329904e-05, "loss": 6.50140151977539, "step": 170 }, { "epoch": 0.0014529838638069792, "grad_norm": 2.4537065029144287, "learning_rate": 1.998400684959169e-05, "loss": 6.28886604309082, "step": 180 }, { "epoch": 0.0015337051895740336, "grad_norm": 3.2922863960266113, "learning_rate": 1.998239137985348e-05, "loss": 6.35135498046875, "step": 190 }, { "epoch": 0.001614426515341088, "grad_norm": 2.1869544982910156, "learning_rate": 1.9980775910115264e-05, "loss": 6.360417175292969, "step": 200 }, { "epoch": 0.0016951478411081423, "grad_norm": 1.0451886653900146, "learning_rate": 1.997916044037705e-05, "loss": 5.949919509887695, "step": 210 }, { "epoch": 0.0017758691668751968, "grad_norm": 1.5639691352844238, "learning_rate": 1.997754497063884e-05, "loss": 7.244811248779297, "step": 220 }, { "epoch": 0.0018565904926422512, "grad_norm": 2.244600296020508, "learning_rate": 1.9975929500900627e-05, "loss": 6.496703338623047, "step": 230 }, { "epoch": 0.0019373118184093055, "grad_norm": 1.2139713764190674, "learning_rate": 1.9974314031162414e-05, "loss": 6.408896636962891, "step": 240 }, { "epoch": 0.00201803314417636, "grad_norm": 2.249967575073242, "learning_rate": 1.99726985614242e-05, "loss": 6.523319244384766, "step": 250 }, { "epoch": 0.0020987544699434145, "grad_norm": 1.029001235961914, "learning_rate": 1.9971083091685986e-05, "loss": 5.544437026977539, "step": 260 }, { "epoch": 0.0021794757957104686, "grad_norm": 1.1416022777557373, "learning_rate": 1.9969467621947774e-05, "loss": 5.831938552856445, "step": 270 }, { "epoch": 0.002260197121477523, "grad_norm": 1.061051845550537, "learning_rate": 1.9967852152209562e-05, "loss": 6.057173156738282, "step": 280 }, { "epoch": 0.0023409184472445777, "grad_norm": 2.713595390319824, "learning_rate": 1.9966236682471346e-05, "loss": 6.640118408203125, "step": 290 }, { "epoch": 0.002421639773011632, "grad_norm": 1.974496603012085, "learning_rate": 1.9964621212733134e-05, "loss": 6.403076934814453, "step": 300 }, { "epoch": 0.0025023610987786864, "grad_norm": 2.7883596420288086, "learning_rate": 1.996300574299492e-05, "loss": 6.439258575439453, "step": 310 }, { "epoch": 0.0025830824245457406, "grad_norm": 1.5357650518417358, "learning_rate": 1.996139027325671e-05, "loss": 6.155916213989258, "step": 320 }, { "epoch": 0.002663803750312795, "grad_norm": 1.0269571542739868, "learning_rate": 1.9959774803518494e-05, "loss": 5.685990905761718, "step": 330 }, { "epoch": 0.0027445250760798497, "grad_norm": 1.0825433731079102, "learning_rate": 1.995815933378028e-05, "loss": 5.468481826782226, "step": 340 }, { "epoch": 0.002825246401846904, "grad_norm": 2.1695127487182617, "learning_rate": 1.995654386404207e-05, "loss": 5.521472930908203, "step": 350 }, { "epoch": 0.0029059677276139584, "grad_norm": 1.9781842231750488, "learning_rate": 1.9954928394303857e-05, "loss": 5.719722747802734, "step": 360 }, { "epoch": 0.0029866890533810126, "grad_norm": 1.2275731563568115, "learning_rate": 1.995331292456564e-05, "loss": 5.881759262084961, "step": 370 }, { "epoch": 0.003067410379148067, "grad_norm": 0.9331965446472168, "learning_rate": 1.995169745482743e-05, "loss": 5.581706619262695, "step": 380 }, { "epoch": 0.0031481317049151217, "grad_norm": 4.014269828796387, "learning_rate": 1.9950081985089216e-05, "loss": 5.808352279663086, "step": 390 }, { "epoch": 0.003228853030682176, "grad_norm": 3.396928071975708, "learning_rate": 1.9948466515351004e-05, "loss": 6.565855407714844, "step": 400 }, { "epoch": 0.0033095743564492304, "grad_norm": 1.84811532497406, "learning_rate": 1.9946851045612788e-05, "loss": 5.745631408691406, "step": 410 }, { "epoch": 0.0033902956822162845, "grad_norm": 1.7310338020324707, "learning_rate": 1.9945235575874576e-05, "loss": 6.148568344116211, "step": 420 }, { "epoch": 0.003471017007983339, "grad_norm": 2.441864252090454, "learning_rate": 1.9943620106136364e-05, "loss": 6.297210693359375, "step": 430 }, { "epoch": 0.0035517383337503937, "grad_norm": 2.714507818222046, "learning_rate": 1.994200463639815e-05, "loss": 5.2566650390625, "step": 440 }, { "epoch": 0.003632459659517448, "grad_norm": 1.3041070699691772, "learning_rate": 1.9940389166659936e-05, "loss": 5.430858612060547, "step": 450 }, { "epoch": 0.0037131809852845024, "grad_norm": 2.1785035133361816, "learning_rate": 1.9938773696921723e-05, "loss": 6.157112121582031, "step": 460 }, { "epoch": 0.0037939023110515565, "grad_norm": 2.364100933074951, "learning_rate": 1.993715822718351e-05, "loss": 5.5743858337402346, "step": 470 }, { "epoch": 0.003874623636818611, "grad_norm": 1.2993165254592896, "learning_rate": 1.99355427574453e-05, "loss": 5.618197250366211, "step": 480 }, { "epoch": 0.003955344962585665, "grad_norm": 0.89766526222229, "learning_rate": 1.9933927287707083e-05, "loss": 5.633193588256836, "step": 490 }, { "epoch": 0.00403606628835272, "grad_norm": 1.6778578758239746, "learning_rate": 1.993231181796887e-05, "loss": 5.6704669952392575, "step": 500 }, { "epoch": 0.004116787614119774, "grad_norm": 1.5714060068130493, "learning_rate": 1.993069634823066e-05, "loss": 5.8513141632080075, "step": 510 }, { "epoch": 0.004197508939886829, "grad_norm": 1.8524656295776367, "learning_rate": 1.9929080878492446e-05, "loss": 6.253496932983398, "step": 520 }, { "epoch": 0.0042782302656538835, "grad_norm": 1.0393726825714111, "learning_rate": 1.992746540875423e-05, "loss": 5.59668197631836, "step": 530 }, { "epoch": 0.004358951591420937, "grad_norm": 1.117863655090332, "learning_rate": 1.992584993901602e-05, "loss": 6.61954345703125, "step": 540 }, { "epoch": 0.004439672917187992, "grad_norm": 1.3503742218017578, "learning_rate": 1.9924234469277806e-05, "loss": 5.538344955444336, "step": 550 }, { "epoch": 0.004520394242955046, "grad_norm": 1.413236141204834, "learning_rate": 1.9922618999539593e-05, "loss": 5.276099014282226, "step": 560 }, { "epoch": 0.004601115568722101, "grad_norm": 1.4035712480545044, "learning_rate": 1.9921003529801378e-05, "loss": 6.3458717346191404, "step": 570 }, { "epoch": 0.0046818368944891555, "grad_norm": 1.3933309316635132, "learning_rate": 1.991938806006317e-05, "loss": 5.8199207305908205, "step": 580 }, { "epoch": 0.004762558220256209, "grad_norm": 1.6060943603515625, "learning_rate": 1.9917772590324953e-05, "loss": 6.309993743896484, "step": 590 }, { "epoch": 0.004843279546023264, "grad_norm": 2.02018404006958, "learning_rate": 1.991615712058674e-05, "loss": 5.5388023376464846, "step": 600 }, { "epoch": 0.004924000871790318, "grad_norm": 2.5779569149017334, "learning_rate": 1.9914541650848525e-05, "loss": 5.664841842651367, "step": 610 }, { "epoch": 0.005004722197557373, "grad_norm": 1.6306579113006592, "learning_rate": 1.9912926181110316e-05, "loss": 5.574106216430664, "step": 620 }, { "epoch": 0.0050854435233244275, "grad_norm": 1.9700396060943604, "learning_rate": 1.99113107113721e-05, "loss": 4.947650146484375, "step": 630 }, { "epoch": 0.005166164849091481, "grad_norm": 1.8461012840270996, "learning_rate": 1.9909695241633888e-05, "loss": 5.425352096557617, "step": 640 }, { "epoch": 0.005246886174858536, "grad_norm": 1.0387723445892334, "learning_rate": 1.9908079771895673e-05, "loss": 4.999172592163086, "step": 650 }, { "epoch": 0.00532760750062559, "grad_norm": 1.5513010025024414, "learning_rate": 1.9906464302157464e-05, "loss": 5.768272781372071, "step": 660 }, { "epoch": 0.005408328826392645, "grad_norm": 1.6448099613189697, "learning_rate": 1.9904848832419248e-05, "loss": 5.3660125732421875, "step": 670 }, { "epoch": 0.005489050152159699, "grad_norm": 1.6896711587905884, "learning_rate": 1.9903233362681036e-05, "loss": 6.265217590332031, "step": 680 }, { "epoch": 0.005569771477926753, "grad_norm": 1.3046419620513916, "learning_rate": 1.9901617892942823e-05, "loss": 5.575093460083008, "step": 690 }, { "epoch": 0.005650492803693808, "grad_norm": 1.3765521049499512, "learning_rate": 1.990000242320461e-05, "loss": 5.535979461669922, "step": 700 }, { "epoch": 0.005731214129460862, "grad_norm": 0.9927377700805664, "learning_rate": 1.9898386953466395e-05, "loss": 5.140326690673828, "step": 710 }, { "epoch": 0.005811935455227917, "grad_norm": 2.6307711601257324, "learning_rate": 1.9896771483728183e-05, "loss": 5.89037971496582, "step": 720 }, { "epoch": 0.005892656780994971, "grad_norm": 0.9548347592353821, "learning_rate": 1.989515601398997e-05, "loss": 5.339773178100586, "step": 730 }, { "epoch": 0.005973378106762025, "grad_norm": 1.3688617944717407, "learning_rate": 1.989354054425176e-05, "loss": 5.210102462768555, "step": 740 }, { "epoch": 0.00605409943252908, "grad_norm": 0.943199634552002, "learning_rate": 1.9891925074513543e-05, "loss": 5.031628799438477, "step": 750 }, { "epoch": 0.006134820758296134, "grad_norm": 1.6071373224258423, "learning_rate": 1.989030960477533e-05, "loss": 5.149977111816407, "step": 760 }, { "epoch": 0.006215542084063189, "grad_norm": 2.471078634262085, "learning_rate": 1.9888694135037118e-05, "loss": 5.196367645263672, "step": 770 }, { "epoch": 0.006296263409830243, "grad_norm": 1.413161277770996, "learning_rate": 1.9887078665298906e-05, "loss": 5.490154266357422, "step": 780 }, { "epoch": 0.006376984735597297, "grad_norm": 2.186739444732666, "learning_rate": 1.988546319556069e-05, "loss": 4.991801452636719, "step": 790 }, { "epoch": 0.006457706061364352, "grad_norm": 1.9074265956878662, "learning_rate": 1.9883847725822478e-05, "loss": 5.260669326782226, "step": 800 }, { "epoch": 0.006538427387131406, "grad_norm": 2.3367815017700195, "learning_rate": 1.9882232256084265e-05, "loss": 4.851223373413086, "step": 810 }, { "epoch": 0.006619148712898461, "grad_norm": 1.7036857604980469, "learning_rate": 1.9880616786346053e-05, "loss": 5.369311141967773, "step": 820 }, { "epoch": 0.006699870038665515, "grad_norm": 0.9501758813858032, "learning_rate": 1.9879001316607837e-05, "loss": 5.30428466796875, "step": 830 }, { "epoch": 0.006780591364432569, "grad_norm": 1.7633531093597412, "learning_rate": 1.9877385846869625e-05, "loss": 5.417334365844726, "step": 840 }, { "epoch": 0.006861312690199624, "grad_norm": 2.3196890354156494, "learning_rate": 1.9875770377131413e-05, "loss": 5.767039489746094, "step": 850 }, { "epoch": 0.006942034015966678, "grad_norm": 1.8074249029159546, "learning_rate": 1.98741549073932e-05, "loss": 6.070658493041992, "step": 860 }, { "epoch": 0.007022755341733733, "grad_norm": 1.0068451166152954, "learning_rate": 1.9872539437654985e-05, "loss": 5.250255966186524, "step": 870 }, { "epoch": 0.007103476667500787, "grad_norm": 1.5293283462524414, "learning_rate": 1.9870923967916773e-05, "loss": 5.48786735534668, "step": 880 }, { "epoch": 0.007184197993267841, "grad_norm": 1.538093090057373, "learning_rate": 1.986930849817856e-05, "loss": 5.095742416381836, "step": 890 }, { "epoch": 0.007264919319034896, "grad_norm": 1.509695053100586, "learning_rate": 1.9867693028440348e-05, "loss": 5.022721481323242, "step": 900 }, { "epoch": 0.00734564064480195, "grad_norm": 2.1293861865997314, "learning_rate": 1.9866077558702132e-05, "loss": 5.294504928588867, "step": 910 }, { "epoch": 0.007426361970569005, "grad_norm": 3.075906276702881, "learning_rate": 1.986446208896392e-05, "loss": 6.010696792602539, "step": 920 }, { "epoch": 0.007507083296336059, "grad_norm": 1.5819621086120605, "learning_rate": 1.9862846619225708e-05, "loss": 5.29765739440918, "step": 930 }, { "epoch": 0.007587804622103113, "grad_norm": 2.057783842086792, "learning_rate": 1.9861231149487495e-05, "loss": 5.325683975219727, "step": 940 }, { "epoch": 0.007668525947870168, "grad_norm": 1.8234333992004395, "learning_rate": 1.985961567974928e-05, "loss": 5.058898162841797, "step": 950 }, { "epoch": 0.007749247273637222, "grad_norm": 1.0379469394683838, "learning_rate": 1.9858000210011067e-05, "loss": 5.005058288574219, "step": 960 }, { "epoch": 0.007829968599404276, "grad_norm": 1.2844942808151245, "learning_rate": 1.9856384740272855e-05, "loss": 4.680921173095703, "step": 970 }, { "epoch": 0.00791068992517133, "grad_norm": 1.1076339483261108, "learning_rate": 1.9854769270534643e-05, "loss": 4.901651382446289, "step": 980 }, { "epoch": 0.007991411250938385, "grad_norm": 1.4986525774002075, "learning_rate": 1.9853153800796427e-05, "loss": 5.167605972290039, "step": 990 }, { "epoch": 0.00807213257670544, "grad_norm": 1.3633522987365723, "learning_rate": 1.9851538331058215e-05, "loss": 5.5746513366699215, "step": 1000 }, { "epoch": 0.008152853902472494, "grad_norm": 0.6998064517974854, "learning_rate": 1.9849922861320002e-05, "loss": 4.386801147460938, "step": 1010 }, { "epoch": 0.008233575228239549, "grad_norm": 1.6105053424835205, "learning_rate": 1.984830739158179e-05, "loss": 5.665913391113281, "step": 1020 }, { "epoch": 0.008314296554006603, "grad_norm": 1.5643501281738281, "learning_rate": 1.9846691921843574e-05, "loss": 5.460854339599609, "step": 1030 }, { "epoch": 0.008395017879773658, "grad_norm": 1.0019451379776, "learning_rate": 1.9845076452105362e-05, "loss": 5.202972793579102, "step": 1040 }, { "epoch": 0.008475739205540712, "grad_norm": 1.0754708051681519, "learning_rate": 1.984346098236715e-05, "loss": 5.448361206054687, "step": 1050 }, { "epoch": 0.008556460531307767, "grad_norm": 1.5665514469146729, "learning_rate": 1.9841845512628937e-05, "loss": 4.864190673828125, "step": 1060 }, { "epoch": 0.00863718185707482, "grad_norm": 1.2935619354248047, "learning_rate": 1.9840230042890722e-05, "loss": 5.491910934448242, "step": 1070 }, { "epoch": 0.008717903182841874, "grad_norm": 2.163273334503174, "learning_rate": 1.983861457315251e-05, "loss": 5.1235107421875, "step": 1080 }, { "epoch": 0.008798624508608929, "grad_norm": 1.2351988554000854, "learning_rate": 1.9836999103414297e-05, "loss": 5.106401062011718, "step": 1090 }, { "epoch": 0.008879345834375984, "grad_norm": 0.9172519445419312, "learning_rate": 1.9835383633676085e-05, "loss": 4.713862609863281, "step": 1100 }, { "epoch": 0.008960067160143038, "grad_norm": 1.155143141746521, "learning_rate": 1.983376816393787e-05, "loss": 4.98199348449707, "step": 1110 }, { "epoch": 0.009040788485910093, "grad_norm": 1.5495681762695312, "learning_rate": 1.9832152694199657e-05, "loss": 5.016569137573242, "step": 1120 }, { "epoch": 0.009121509811677147, "grad_norm": 1.7745457887649536, "learning_rate": 1.9830537224461445e-05, "loss": 4.673791885375977, "step": 1130 }, { "epoch": 0.009202231137444202, "grad_norm": 2.1868464946746826, "learning_rate": 1.9828921754723232e-05, "loss": 4.895331573486328, "step": 1140 }, { "epoch": 0.009282952463211256, "grad_norm": 0.8666797280311584, "learning_rate": 1.9827306284985017e-05, "loss": 5.002678298950196, "step": 1150 }, { "epoch": 0.009363673788978311, "grad_norm": 1.233217716217041, "learning_rate": 1.9825690815246804e-05, "loss": 4.839445114135742, "step": 1160 }, { "epoch": 0.009444395114745364, "grad_norm": 1.1748716831207275, "learning_rate": 1.9824075345508592e-05, "loss": 4.672160720825195, "step": 1170 }, { "epoch": 0.009525116440512418, "grad_norm": 1.8727009296417236, "learning_rate": 1.982245987577038e-05, "loss": 4.878614044189453, "step": 1180 }, { "epoch": 0.009605837766279473, "grad_norm": 1.7943673133850098, "learning_rate": 1.9820844406032164e-05, "loss": 4.784691619873047, "step": 1190 }, { "epoch": 0.009686559092046527, "grad_norm": 1.1865836381912231, "learning_rate": 1.981922893629395e-05, "loss": 4.720969390869141, "step": 1200 }, { "epoch": 0.009767280417813582, "grad_norm": 1.0905866622924805, "learning_rate": 1.981761346655574e-05, "loss": 5.072229385375977, "step": 1210 }, { "epoch": 0.009848001743580637, "grad_norm": 1.2380354404449463, "learning_rate": 1.9815997996817527e-05, "loss": 4.910837554931641, "step": 1220 }, { "epoch": 0.009928723069347691, "grad_norm": 2.231370210647583, "learning_rate": 1.981438252707931e-05, "loss": 4.945140838623047, "step": 1230 }, { "epoch": 0.010009444395114746, "grad_norm": 0.877223014831543, "learning_rate": 1.98127670573411e-05, "loss": 4.455559158325196, "step": 1240 }, { "epoch": 0.0100901657208818, "grad_norm": 1.6550467014312744, "learning_rate": 1.9811151587602887e-05, "loss": 4.720668792724609, "step": 1250 }, { "epoch": 0.010170887046648855, "grad_norm": 0.9206938147544861, "learning_rate": 1.9809536117864674e-05, "loss": 5.161466979980469, "step": 1260 }, { "epoch": 0.010251608372415908, "grad_norm": 1.2905943393707275, "learning_rate": 1.980792064812646e-05, "loss": 5.022149658203125, "step": 1270 }, { "epoch": 0.010332329698182962, "grad_norm": 0.9491216540336609, "learning_rate": 1.9806305178388246e-05, "loss": 5.111972808837891, "step": 1280 }, { "epoch": 0.010413051023950017, "grad_norm": 1.3040118217468262, "learning_rate": 1.9804689708650034e-05, "loss": 5.07166862487793, "step": 1290 }, { "epoch": 0.010493772349717071, "grad_norm": 0.6517397165298462, "learning_rate": 1.9803074238911822e-05, "loss": 4.986681365966797, "step": 1300 }, { "epoch": 0.010574493675484126, "grad_norm": 1.7895499467849731, "learning_rate": 1.9801458769173606e-05, "loss": 5.152071762084961, "step": 1310 }, { "epoch": 0.01065521500125118, "grad_norm": 1.8562183380126953, "learning_rate": 1.9799843299435394e-05, "loss": 5.033748626708984, "step": 1320 }, { "epoch": 0.010735936327018235, "grad_norm": 9.420151710510254, "learning_rate": 1.979822782969718e-05, "loss": 4.918307113647461, "step": 1330 }, { "epoch": 0.01081665765278529, "grad_norm": 1.5096276998519897, "learning_rate": 1.979661235995897e-05, "loss": 5.066556167602539, "step": 1340 }, { "epoch": 0.010897378978552344, "grad_norm": 1.6742454767227173, "learning_rate": 1.9794996890220753e-05, "loss": 4.603495025634766, "step": 1350 }, { "epoch": 0.010978100304319399, "grad_norm": 1.5523711442947388, "learning_rate": 1.979338142048254e-05, "loss": 4.983476257324218, "step": 1360 }, { "epoch": 0.011058821630086452, "grad_norm": 1.0065193176269531, "learning_rate": 1.979176595074433e-05, "loss": 4.47662582397461, "step": 1370 }, { "epoch": 0.011139542955853506, "grad_norm": 1.2592872381210327, "learning_rate": 1.9790150481006117e-05, "loss": 4.7069957733154295, "step": 1380 }, { "epoch": 0.01122026428162056, "grad_norm": 1.4217849969863892, "learning_rate": 1.97885350112679e-05, "loss": 4.604948043823242, "step": 1390 }, { "epoch": 0.011300985607387615, "grad_norm": 2.3401284217834473, "learning_rate": 1.978691954152969e-05, "loss": 5.420458602905273, "step": 1400 }, { "epoch": 0.01138170693315467, "grad_norm": 1.0662442445755005, "learning_rate": 1.9785304071791476e-05, "loss": 4.485123062133789, "step": 1410 }, { "epoch": 0.011462428258921725, "grad_norm": 2.2591707706451416, "learning_rate": 1.9783688602053264e-05, "loss": 4.511461639404297, "step": 1420 }, { "epoch": 0.011543149584688779, "grad_norm": 1.161214828491211, "learning_rate": 1.9782073132315048e-05, "loss": 5.049435043334961, "step": 1430 }, { "epoch": 0.011623870910455834, "grad_norm": 1.4109967947006226, "learning_rate": 1.9780457662576836e-05, "loss": 4.879238128662109, "step": 1440 }, { "epoch": 0.011704592236222888, "grad_norm": 1.0740419626235962, "learning_rate": 1.9778842192838624e-05, "loss": 4.8542133331298825, "step": 1450 }, { "epoch": 0.011785313561989943, "grad_norm": 1.3127236366271973, "learning_rate": 1.977722672310041e-05, "loss": 4.777730941772461, "step": 1460 }, { "epoch": 0.011866034887756997, "grad_norm": 1.3703532218933105, "learning_rate": 1.97756112533622e-05, "loss": 5.405302047729492, "step": 1470 }, { "epoch": 0.01194675621352405, "grad_norm": 2.3768866062164307, "learning_rate": 1.9773995783623983e-05, "loss": 4.822198867797852, "step": 1480 }, { "epoch": 0.012027477539291105, "grad_norm": 1.4438884258270264, "learning_rate": 1.9772380313885774e-05, "loss": 4.766239166259766, "step": 1490 }, { "epoch": 0.01210819886505816, "grad_norm": 1.1307283639907837, "learning_rate": 1.977076484414756e-05, "loss": 4.5920158386230465, "step": 1500 }, { "epoch": 0.012188920190825214, "grad_norm": 1.6928008794784546, "learning_rate": 1.9769149374409346e-05, "loss": 4.275356674194336, "step": 1510 }, { "epoch": 0.012269641516592268, "grad_norm": 1.886687994003296, "learning_rate": 1.976753390467113e-05, "loss": 4.970389556884766, "step": 1520 }, { "epoch": 0.012350362842359323, "grad_norm": 1.2382768392562866, "learning_rate": 1.9765918434932922e-05, "loss": 4.825216674804688, "step": 1530 }, { "epoch": 0.012431084168126378, "grad_norm": 1.5056403875350952, "learning_rate": 1.9764302965194706e-05, "loss": 4.660939407348633, "step": 1540 }, { "epoch": 0.012511805493893432, "grad_norm": 1.3904926776885986, "learning_rate": 1.9762687495456494e-05, "loss": 5.746333312988281, "step": 1550 }, { "epoch": 0.012592526819660487, "grad_norm": 2.1274197101593018, "learning_rate": 1.976107202571828e-05, "loss": 5.276415634155273, "step": 1560 }, { "epoch": 0.012673248145427541, "grad_norm": 1.6082332134246826, "learning_rate": 1.975945655598007e-05, "loss": 4.680088806152344, "step": 1570 }, { "epoch": 0.012753969471194594, "grad_norm": 0.9629959464073181, "learning_rate": 1.9757841086241853e-05, "loss": 5.28907585144043, "step": 1580 }, { "epoch": 0.012834690796961649, "grad_norm": 1.4751808643341064, "learning_rate": 1.975622561650364e-05, "loss": 4.636224365234375, "step": 1590 }, { "epoch": 0.012915412122728703, "grad_norm": 2.3204569816589355, "learning_rate": 1.975461014676543e-05, "loss": 5.142553329467773, "step": 1600 }, { "epoch": 0.012996133448495758, "grad_norm": 0.9987376928329468, "learning_rate": 1.9752994677027217e-05, "loss": 4.940226745605469, "step": 1610 }, { "epoch": 0.013076854774262812, "grad_norm": 1.4052804708480835, "learning_rate": 1.9751379207289e-05, "loss": 4.409165573120117, "step": 1620 }, { "epoch": 0.013157576100029867, "grad_norm": 1.4073339700698853, "learning_rate": 1.974976373755079e-05, "loss": 5.178488540649414, "step": 1630 }, { "epoch": 0.013238297425796922, "grad_norm": 1.4444657564163208, "learning_rate": 1.9748148267812576e-05, "loss": 4.769265365600586, "step": 1640 }, { "epoch": 0.013319018751563976, "grad_norm": 1.3476840257644653, "learning_rate": 1.9746532798074364e-05, "loss": 5.052593612670899, "step": 1650 }, { "epoch": 0.01339974007733103, "grad_norm": 0.8617321848869324, "learning_rate": 1.9744917328336148e-05, "loss": 4.6245983123779295, "step": 1660 }, { "epoch": 0.013480461403098085, "grad_norm": 1.356015920639038, "learning_rate": 1.9743301858597936e-05, "loss": 4.7076171875, "step": 1670 }, { "epoch": 0.013561182728865138, "grad_norm": 0.8423445224761963, "learning_rate": 1.9741686388859724e-05, "loss": 5.298031616210937, "step": 1680 }, { "epoch": 0.013641904054632193, "grad_norm": 1.4269553422927856, "learning_rate": 1.974007091912151e-05, "loss": 4.156510543823242, "step": 1690 }, { "epoch": 0.013722625380399247, "grad_norm": 1.327349305152893, "learning_rate": 1.9738455449383296e-05, "loss": 4.790211486816406, "step": 1700 }, { "epoch": 0.013803346706166302, "grad_norm": 1.145967960357666, "learning_rate": 1.9736839979645083e-05, "loss": 5.426899719238281, "step": 1710 }, { "epoch": 0.013884068031933356, "grad_norm": 1.3912030458450317, "learning_rate": 1.973522450990687e-05, "loss": 4.426843643188477, "step": 1720 }, { "epoch": 0.013964789357700411, "grad_norm": 0.8256801962852478, "learning_rate": 1.973360904016866e-05, "loss": 5.58739128112793, "step": 1730 }, { "epoch": 0.014045510683467466, "grad_norm": 1.762925148010254, "learning_rate": 1.9731993570430443e-05, "loss": 5.03930778503418, "step": 1740 }, { "epoch": 0.01412623200923452, "grad_norm": 1.1882147789001465, "learning_rate": 1.973037810069223e-05, "loss": 4.75993537902832, "step": 1750 }, { "epoch": 0.014206953335001575, "grad_norm": 1.0589845180511475, "learning_rate": 1.972876263095402e-05, "loss": 4.2397300720214846, "step": 1760 }, { "epoch": 0.01428767466076863, "grad_norm": 1.849997878074646, "learning_rate": 1.9727147161215806e-05, "loss": 4.705167007446289, "step": 1770 }, { "epoch": 0.014368395986535682, "grad_norm": 1.352772831916809, "learning_rate": 1.972553169147759e-05, "loss": 5.173005294799805, "step": 1780 }, { "epoch": 0.014449117312302737, "grad_norm": 0.9870211482048035, "learning_rate": 1.9723916221739378e-05, "loss": 4.4591064453125, "step": 1790 }, { "epoch": 0.014529838638069791, "grad_norm": 1.0916539430618286, "learning_rate": 1.9722300752001166e-05, "loss": 5.314525985717774, "step": 1800 }, { "epoch": 0.014610559963836846, "grad_norm": 0.999883770942688, "learning_rate": 1.9720685282262953e-05, "loss": 4.617046737670899, "step": 1810 }, { "epoch": 0.0146912812896039, "grad_norm": 1.3336068391799927, "learning_rate": 1.9719069812524738e-05, "loss": 4.699541473388672, "step": 1820 }, { "epoch": 0.014772002615370955, "grad_norm": 2.9386978149414062, "learning_rate": 1.9717454342786525e-05, "loss": 5.170671463012695, "step": 1830 }, { "epoch": 0.01485272394113801, "grad_norm": 0.7553975582122803, "learning_rate": 1.9715838873048313e-05, "loss": 4.684725570678711, "step": 1840 }, { "epoch": 0.014933445266905064, "grad_norm": 1.8531930446624756, "learning_rate": 1.97142234033101e-05, "loss": 4.405824661254883, "step": 1850 }, { "epoch": 0.015014166592672119, "grad_norm": 1.6814180612564087, "learning_rate": 1.9712607933571885e-05, "loss": 5.5620170593261715, "step": 1860 }, { "epoch": 0.015094887918439173, "grad_norm": 1.6623845100402832, "learning_rate": 1.9710992463833673e-05, "loss": 4.525441360473633, "step": 1870 }, { "epoch": 0.015175609244206226, "grad_norm": 2.0976991653442383, "learning_rate": 1.970937699409546e-05, "loss": 4.697515487670898, "step": 1880 }, { "epoch": 0.01525633056997328, "grad_norm": 1.6893714666366577, "learning_rate": 1.9707761524357248e-05, "loss": 4.546325302124023, "step": 1890 }, { "epoch": 0.015337051895740335, "grad_norm": 1.8773167133331299, "learning_rate": 1.9706146054619033e-05, "loss": 5.228215026855469, "step": 1900 }, { "epoch": 0.01541777322150739, "grad_norm": 0.7116371989250183, "learning_rate": 1.970453058488082e-05, "loss": 4.832821655273437, "step": 1910 }, { "epoch": 0.015498494547274444, "grad_norm": 1.0682133436203003, "learning_rate": 1.9702915115142608e-05, "loss": 4.934266662597656, "step": 1920 }, { "epoch": 0.015579215873041499, "grad_norm": 1.2470345497131348, "learning_rate": 1.9701299645404396e-05, "loss": 4.945410919189453, "step": 1930 }, { "epoch": 0.01565993719880855, "grad_norm": 1.2420868873596191, "learning_rate": 1.969968417566618e-05, "loss": 5.4785198211669925, "step": 1940 }, { "epoch": 0.015740658524575606, "grad_norm": 1.1343059539794922, "learning_rate": 1.9698068705927968e-05, "loss": 4.424651718139648, "step": 1950 }, { "epoch": 0.01582137985034266, "grad_norm": 2.261370897293091, "learning_rate": 1.9696453236189755e-05, "loss": 5.2899627685546875, "step": 1960 }, { "epoch": 0.015902101176109715, "grad_norm": 1.1543649435043335, "learning_rate": 1.9694837766451543e-05, "loss": 4.969830322265625, "step": 1970 }, { "epoch": 0.01598282250187677, "grad_norm": 1.2805522680282593, "learning_rate": 1.9693222296713327e-05, "loss": 4.516814422607422, "step": 1980 }, { "epoch": 0.016063543827643825, "grad_norm": 1.1825766563415527, "learning_rate": 1.9691606826975115e-05, "loss": 4.750716018676758, "step": 1990 }, { "epoch": 0.01614426515341088, "grad_norm": 1.7707457542419434, "learning_rate": 1.9689991357236903e-05, "loss": 4.467962646484375, "step": 2000 }, { "epoch": 0.016224986479177934, "grad_norm": 1.1550182104110718, "learning_rate": 1.968837588749869e-05, "loss": 4.6878173828125, "step": 2010 }, { "epoch": 0.01630570780494499, "grad_norm": 0.9064394235610962, "learning_rate": 1.9686760417760475e-05, "loss": 4.530033874511719, "step": 2020 }, { "epoch": 0.016386429130712043, "grad_norm": 0.7322325706481934, "learning_rate": 1.9685144948022262e-05, "loss": 4.286885452270508, "step": 2030 }, { "epoch": 0.016467150456479097, "grad_norm": 1.3097376823425293, "learning_rate": 1.968352947828405e-05, "loss": 4.687649154663086, "step": 2040 }, { "epoch": 0.016547871782246152, "grad_norm": 1.2164967060089111, "learning_rate": 1.9681914008545838e-05, "loss": 4.590193176269532, "step": 2050 }, { "epoch": 0.016628593108013207, "grad_norm": 1.1107456684112549, "learning_rate": 1.9680298538807622e-05, "loss": 4.850758361816406, "step": 2060 }, { "epoch": 0.01670931443378026, "grad_norm": 1.648370623588562, "learning_rate": 1.967868306906941e-05, "loss": 4.733086013793946, "step": 2070 }, { "epoch": 0.016790035759547316, "grad_norm": 1.2390748262405396, "learning_rate": 1.9677067599331197e-05, "loss": 4.741577529907227, "step": 2080 }, { "epoch": 0.01687075708531437, "grad_norm": 1.6130567789077759, "learning_rate": 1.9675452129592985e-05, "loss": 4.782355499267578, "step": 2090 }, { "epoch": 0.016951478411081425, "grad_norm": 1.505465030670166, "learning_rate": 1.967383665985477e-05, "loss": 4.6838829040527346, "step": 2100 }, { "epoch": 0.01703219973684848, "grad_norm": 1.2312997579574585, "learning_rate": 1.9672221190116557e-05, "loss": 4.492626571655274, "step": 2110 }, { "epoch": 0.017112921062615534, "grad_norm": 1.3827775716781616, "learning_rate": 1.9670605720378345e-05, "loss": 4.978173065185547, "step": 2120 }, { "epoch": 0.017193642388382585, "grad_norm": 1.829370141029358, "learning_rate": 1.9668990250640133e-05, "loss": 4.651539993286133, "step": 2130 }, { "epoch": 0.01727436371414964, "grad_norm": 1.5973265171051025, "learning_rate": 1.9667374780901917e-05, "loss": 4.342641830444336, "step": 2140 }, { "epoch": 0.017355085039916694, "grad_norm": 1.206342339515686, "learning_rate": 1.9665759311163704e-05, "loss": 4.533686828613281, "step": 2150 }, { "epoch": 0.01743580636568375, "grad_norm": 1.7375551462173462, "learning_rate": 1.9664143841425492e-05, "loss": 4.671933364868164, "step": 2160 }, { "epoch": 0.017516527691450803, "grad_norm": 0.928835391998291, "learning_rate": 1.966252837168728e-05, "loss": 4.3294219970703125, "step": 2170 }, { "epoch": 0.017597249017217858, "grad_norm": 0.9663963317871094, "learning_rate": 1.9660912901949064e-05, "loss": 4.9327545166015625, "step": 2180 }, { "epoch": 0.017677970342984912, "grad_norm": 1.1467374563217163, "learning_rate": 1.9659297432210852e-05, "loss": 4.782512283325195, "step": 2190 }, { "epoch": 0.017758691668751967, "grad_norm": 1.8887301683425903, "learning_rate": 1.965768196247264e-05, "loss": 4.395632553100586, "step": 2200 }, { "epoch": 0.01783941299451902, "grad_norm": 0.8893512487411499, "learning_rate": 1.9656066492734427e-05, "loss": 4.627455520629883, "step": 2210 }, { "epoch": 0.017920134320286076, "grad_norm": 1.5465189218521118, "learning_rate": 1.965445102299621e-05, "loss": 4.44348030090332, "step": 2220 }, { "epoch": 0.01800085564605313, "grad_norm": 1.0052744150161743, "learning_rate": 1.9652835553258e-05, "loss": 4.435536193847656, "step": 2230 }, { "epoch": 0.018081576971820185, "grad_norm": 1.3823529481887817, "learning_rate": 1.9651220083519787e-05, "loss": 4.6699951171875, "step": 2240 }, { "epoch": 0.01816229829758724, "grad_norm": 1.1698533296585083, "learning_rate": 1.9649604613781575e-05, "loss": 5.228911972045898, "step": 2250 }, { "epoch": 0.018243019623354294, "grad_norm": 1.6364613771438599, "learning_rate": 1.964798914404336e-05, "loss": 4.474937057495117, "step": 2260 }, { "epoch": 0.01832374094912135, "grad_norm": 1.1150562763214111, "learning_rate": 1.9646373674305147e-05, "loss": 4.392767333984375, "step": 2270 }, { "epoch": 0.018404462274888404, "grad_norm": 1.6234192848205566, "learning_rate": 1.9644758204566934e-05, "loss": 4.045706939697266, "step": 2280 }, { "epoch": 0.018485183600655458, "grad_norm": 0.8739916682243347, "learning_rate": 1.9643142734828722e-05, "loss": 3.9124332427978517, "step": 2290 }, { "epoch": 0.018565904926422513, "grad_norm": 1.5652949810028076, "learning_rate": 1.9641527265090506e-05, "loss": 4.218809509277344, "step": 2300 }, { "epoch": 0.018646626252189567, "grad_norm": 1.1321407556533813, "learning_rate": 1.9639911795352294e-05, "loss": 4.532928848266602, "step": 2310 }, { "epoch": 0.018727347577956622, "grad_norm": 1.3396732807159424, "learning_rate": 1.9638296325614082e-05, "loss": 3.9488868713378906, "step": 2320 }, { "epoch": 0.018808068903723676, "grad_norm": 1.0211687088012695, "learning_rate": 1.963668085587587e-05, "loss": 4.809571838378906, "step": 2330 }, { "epoch": 0.018888790229490728, "grad_norm": 1.850880742073059, "learning_rate": 1.9635065386137654e-05, "loss": 4.768941879272461, "step": 2340 }, { "epoch": 0.018969511555257782, "grad_norm": 1.0890032052993774, "learning_rate": 1.963344991639944e-05, "loss": 4.9358062744140625, "step": 2350 }, { "epoch": 0.019050232881024837, "grad_norm": 1.102660894393921, "learning_rate": 1.963183444666123e-05, "loss": 3.942164993286133, "step": 2360 }, { "epoch": 0.01913095420679189, "grad_norm": 1.0964233875274658, "learning_rate": 1.9630218976923017e-05, "loss": 5.853545761108398, "step": 2370 }, { "epoch": 0.019211675532558946, "grad_norm": 1.3031436204910278, "learning_rate": 1.96286035071848e-05, "loss": 4.632845306396485, "step": 2380 }, { "epoch": 0.019292396858326, "grad_norm": 1.4037084579467773, "learning_rate": 1.962698803744659e-05, "loss": 4.450349426269531, "step": 2390 }, { "epoch": 0.019373118184093055, "grad_norm": 1.5335683822631836, "learning_rate": 1.9625372567708376e-05, "loss": 4.9290424346923825, "step": 2400 }, { "epoch": 0.01945383950986011, "grad_norm": 1.5462621450424194, "learning_rate": 1.9623757097970164e-05, "loss": 4.24017448425293, "step": 2410 }, { "epoch": 0.019534560835627164, "grad_norm": 1.0589641332626343, "learning_rate": 1.962214162823195e-05, "loss": 3.8293987274169923, "step": 2420 }, { "epoch": 0.01961528216139422, "grad_norm": 1.2925102710723877, "learning_rate": 1.962052615849374e-05, "loss": 5.14282455444336, "step": 2430 }, { "epoch": 0.019696003487161273, "grad_norm": 1.227390170097351, "learning_rate": 1.9618910688755524e-05, "loss": 5.655985260009766, "step": 2440 }, { "epoch": 0.019776724812928328, "grad_norm": 1.186826467514038, "learning_rate": 1.961729521901731e-05, "loss": 4.417516708374023, "step": 2450 }, { "epoch": 0.019857446138695382, "grad_norm": 1.421464443206787, "learning_rate": 1.9615679749279096e-05, "loss": 4.8032489776611325, "step": 2460 }, { "epoch": 0.019938167464462437, "grad_norm": 1.2767060995101929, "learning_rate": 1.9614064279540887e-05, "loss": 4.642615890502929, "step": 2470 }, { "epoch": 0.02001888879022949, "grad_norm": 1.5728005170822144, "learning_rate": 1.961244880980267e-05, "loss": 4.017352294921875, "step": 2480 }, { "epoch": 0.020099610115996546, "grad_norm": 1.4521260261535645, "learning_rate": 1.961083334006446e-05, "loss": 4.538410949707031, "step": 2490 }, { "epoch": 0.0201803314417636, "grad_norm": 0.9765028953552246, "learning_rate": 1.9609217870326243e-05, "loss": 4.313709259033203, "step": 2500 }, { "epoch": 0.020261052767530655, "grad_norm": 0.8355658650398254, "learning_rate": 1.9607602400588034e-05, "loss": 4.057535552978516, "step": 2510 }, { "epoch": 0.02034177409329771, "grad_norm": 1.2434446811676025, "learning_rate": 1.960598693084982e-05, "loss": 4.263953781127929, "step": 2520 }, { "epoch": 0.020422495419064764, "grad_norm": 1.0655361413955688, "learning_rate": 1.9604371461111606e-05, "loss": 4.414466094970703, "step": 2530 }, { "epoch": 0.020503216744831815, "grad_norm": 1.225558876991272, "learning_rate": 1.960275599137339e-05, "loss": 4.642972183227539, "step": 2540 }, { "epoch": 0.02058393807059887, "grad_norm": 1.7446657419204712, "learning_rate": 1.9601140521635182e-05, "loss": 4.062141799926758, "step": 2550 }, { "epoch": 0.020664659396365925, "grad_norm": 1.358566403388977, "learning_rate": 1.9599525051896966e-05, "loss": 4.555065155029297, "step": 2560 }, { "epoch": 0.02074538072213298, "grad_norm": 1.10065758228302, "learning_rate": 1.9597909582158754e-05, "loss": 4.639959335327148, "step": 2570 }, { "epoch": 0.020826102047900034, "grad_norm": 1.0884294509887695, "learning_rate": 1.9596294112420538e-05, "loss": 4.288612365722656, "step": 2580 }, { "epoch": 0.02090682337366709, "grad_norm": 1.1814773082733154, "learning_rate": 1.959467864268233e-05, "loss": 4.5220386505126955, "step": 2590 }, { "epoch": 0.020987544699434143, "grad_norm": 0.956712543964386, "learning_rate": 1.9593063172944113e-05, "loss": 4.563016510009765, "step": 2600 }, { "epoch": 0.021068266025201197, "grad_norm": 1.505356788635254, "learning_rate": 1.95914477032059e-05, "loss": 4.558944320678711, "step": 2610 }, { "epoch": 0.021148987350968252, "grad_norm": 1.0555118322372437, "learning_rate": 1.958983223346769e-05, "loss": 4.3801532745361325, "step": 2620 }, { "epoch": 0.021229708676735307, "grad_norm": 1.0618782043457031, "learning_rate": 1.9588216763729476e-05, "loss": 4.616811370849609, "step": 2630 }, { "epoch": 0.02131043000250236, "grad_norm": 0.7060980200767517, "learning_rate": 1.958660129399126e-05, "loss": 4.1966392517089846, "step": 2640 }, { "epoch": 0.021391151328269416, "grad_norm": 1.105555534362793, "learning_rate": 1.958498582425305e-05, "loss": 4.554911422729492, "step": 2650 }, { "epoch": 0.02147187265403647, "grad_norm": 1.2110084295272827, "learning_rate": 1.9583370354514836e-05, "loss": 4.235490798950195, "step": 2660 }, { "epoch": 0.021552593979803525, "grad_norm": 1.253963589668274, "learning_rate": 1.9581754884776624e-05, "loss": 4.512850570678711, "step": 2670 }, { "epoch": 0.02163331530557058, "grad_norm": 1.298062801361084, "learning_rate": 1.9580139415038408e-05, "loss": 3.9132835388183596, "step": 2680 }, { "epoch": 0.021714036631337634, "grad_norm": 1.6135417222976685, "learning_rate": 1.9578523945300196e-05, "loss": 4.008386611938477, "step": 2690 }, { "epoch": 0.02179475795710469, "grad_norm": 1.2345529794692993, "learning_rate": 1.9576908475561984e-05, "loss": 5.381795883178711, "step": 2700 }, { "epoch": 0.021875479282871743, "grad_norm": 0.7419474720954895, "learning_rate": 1.957529300582377e-05, "loss": 4.537158966064453, "step": 2710 }, { "epoch": 0.021956200608638798, "grad_norm": 1.0989296436309814, "learning_rate": 1.957367753608556e-05, "loss": 4.120376968383789, "step": 2720 }, { "epoch": 0.022036921934405852, "grad_norm": 1.0885448455810547, "learning_rate": 1.9572062066347343e-05, "loss": 4.42515869140625, "step": 2730 }, { "epoch": 0.022117643260172903, "grad_norm": 1.6120877265930176, "learning_rate": 1.957044659660913e-05, "loss": 4.266778182983399, "step": 2740 }, { "epoch": 0.022198364585939958, "grad_norm": 0.8910641074180603, "learning_rate": 1.956883112687092e-05, "loss": 4.5727790832519535, "step": 2750 }, { "epoch": 0.022279085911707013, "grad_norm": 0.833705723285675, "learning_rate": 1.9567215657132706e-05, "loss": 3.986185836791992, "step": 2760 }, { "epoch": 0.022359807237474067, "grad_norm": 0.7064257264137268, "learning_rate": 1.956560018739449e-05, "loss": 4.022796630859375, "step": 2770 }, { "epoch": 0.02244052856324112, "grad_norm": 1.2828689813613892, "learning_rate": 1.956398471765628e-05, "loss": 4.657814025878906, "step": 2780 }, { "epoch": 0.022521249889008176, "grad_norm": 1.343841314315796, "learning_rate": 1.9562369247918066e-05, "loss": 4.544038009643555, "step": 2790 }, { "epoch": 0.02260197121477523, "grad_norm": 1.9445909261703491, "learning_rate": 1.9560753778179854e-05, "loss": 4.897145462036133, "step": 2800 }, { "epoch": 0.022682692540542285, "grad_norm": 1.30354905128479, "learning_rate": 1.9559138308441638e-05, "loss": 4.304722595214844, "step": 2810 }, { "epoch": 0.02276341386630934, "grad_norm": 1.1695384979248047, "learning_rate": 1.9557522838703426e-05, "loss": 4.39862289428711, "step": 2820 }, { "epoch": 0.022844135192076395, "grad_norm": 0.8554438948631287, "learning_rate": 1.9555907368965213e-05, "loss": 4.14953842163086, "step": 2830 }, { "epoch": 0.02292485651784345, "grad_norm": 1.2409034967422485, "learning_rate": 1.9554291899227e-05, "loss": 4.4796600341796875, "step": 2840 }, { "epoch": 0.023005577843610504, "grad_norm": 1.090754508972168, "learning_rate": 1.9552676429488785e-05, "loss": 4.41173095703125, "step": 2850 }, { "epoch": 0.023086299169377558, "grad_norm": 1.2528127431869507, "learning_rate": 1.9551060959750573e-05, "loss": 4.674258041381836, "step": 2860 }, { "epoch": 0.023167020495144613, "grad_norm": 1.4996005296707153, "learning_rate": 1.954944549001236e-05, "loss": 4.207735061645508, "step": 2870 }, { "epoch": 0.023247741820911667, "grad_norm": 1.0577125549316406, "learning_rate": 1.954783002027415e-05, "loss": 5.011627960205078, "step": 2880 }, { "epoch": 0.023328463146678722, "grad_norm": 1.5110156536102295, "learning_rate": 1.9546214550535933e-05, "loss": 4.302392959594727, "step": 2890 }, { "epoch": 0.023409184472445776, "grad_norm": 2.017362594604492, "learning_rate": 1.954459908079772e-05, "loss": 4.413845825195312, "step": 2900 }, { "epoch": 0.02348990579821283, "grad_norm": 0.8764426708221436, "learning_rate": 1.9542983611059508e-05, "loss": 4.517938232421875, "step": 2910 }, { "epoch": 0.023570627123979886, "grad_norm": 2.5458767414093018, "learning_rate": 1.9541368141321296e-05, "loss": 4.8132377624511715, "step": 2920 }, { "epoch": 0.02365134844974694, "grad_norm": 0.9178889393806458, "learning_rate": 1.953975267158308e-05, "loss": 4.1978271484375, "step": 2930 }, { "epoch": 0.023732069775513995, "grad_norm": 0.9978417158126831, "learning_rate": 1.9538137201844868e-05, "loss": 4.064857482910156, "step": 2940 }, { "epoch": 0.023812791101281046, "grad_norm": 1.7453441619873047, "learning_rate": 1.9536521732106656e-05, "loss": 4.27537956237793, "step": 2950 }, { "epoch": 0.0238935124270481, "grad_norm": 1.1150844097137451, "learning_rate": 1.9534906262368443e-05, "loss": 4.787469863891602, "step": 2960 }, { "epoch": 0.023974233752815155, "grad_norm": 3.24135422706604, "learning_rate": 1.9533290792630228e-05, "loss": 4.453439712524414, "step": 2970 }, { "epoch": 0.02405495507858221, "grad_norm": 1.7816437482833862, "learning_rate": 1.9531675322892015e-05, "loss": 4.920392990112305, "step": 2980 }, { "epoch": 0.024135676404349264, "grad_norm": 1.5778759717941284, "learning_rate": 1.9530059853153803e-05, "loss": 4.326131439208984, "step": 2990 }, { "epoch": 0.02421639773011632, "grad_norm": 1.3172669410705566, "learning_rate": 1.952844438341559e-05, "loss": 4.481901931762695, "step": 3000 }, { "epoch": 0.024297119055883373, "grad_norm": 1.2631902694702148, "learning_rate": 1.9526828913677375e-05, "loss": 4.933498764038086, "step": 3010 }, { "epoch": 0.024377840381650428, "grad_norm": 1.3694018125534058, "learning_rate": 1.9525213443939163e-05, "loss": 4.12878532409668, "step": 3020 }, { "epoch": 0.024458561707417482, "grad_norm": 1.7069928646087646, "learning_rate": 1.952359797420095e-05, "loss": 4.152912139892578, "step": 3030 }, { "epoch": 0.024539283033184537, "grad_norm": 1.7218422889709473, "learning_rate": 1.9521982504462738e-05, "loss": 4.5320892333984375, "step": 3040 }, { "epoch": 0.02462000435895159, "grad_norm": 1.7334256172180176, "learning_rate": 1.9520367034724522e-05, "loss": 4.514708709716797, "step": 3050 }, { "epoch": 0.024700725684718646, "grad_norm": 1.0844740867614746, "learning_rate": 1.951875156498631e-05, "loss": 4.501793289184571, "step": 3060 }, { "epoch": 0.0247814470104857, "grad_norm": 1.0005642175674438, "learning_rate": 1.9517136095248098e-05, "loss": 4.715765762329101, "step": 3070 }, { "epoch": 0.024862168336252755, "grad_norm": 0.8889300227165222, "learning_rate": 1.9515520625509885e-05, "loss": 4.042214965820312, "step": 3080 }, { "epoch": 0.02494288966201981, "grad_norm": 1.0354474782943726, "learning_rate": 1.951390515577167e-05, "loss": 4.321072387695312, "step": 3090 }, { "epoch": 0.025023610987786864, "grad_norm": 1.4672093391418457, "learning_rate": 1.9512289686033457e-05, "loss": 5.055201721191406, "step": 3100 }, { "epoch": 0.02510433231355392, "grad_norm": 1.2758092880249023, "learning_rate": 1.9510674216295245e-05, "loss": 4.302978134155273, "step": 3110 }, { "epoch": 0.025185053639320974, "grad_norm": 1.050675392150879, "learning_rate": 1.9509058746557033e-05, "loss": 4.464220809936523, "step": 3120 }, { "epoch": 0.025265774965088028, "grad_norm": 1.6096774339675903, "learning_rate": 1.9507443276818817e-05, "loss": 4.651048278808593, "step": 3130 }, { "epoch": 0.025346496290855083, "grad_norm": 1.2324535846710205, "learning_rate": 1.9505827807080605e-05, "loss": 4.138620376586914, "step": 3140 }, { "epoch": 0.025427217616622134, "grad_norm": 1.2281434535980225, "learning_rate": 1.9504212337342392e-05, "loss": 4.380453109741211, "step": 3150 }, { "epoch": 0.02550793894238919, "grad_norm": 0.9950925707817078, "learning_rate": 1.950259686760418e-05, "loss": 3.936243438720703, "step": 3160 }, { "epoch": 0.025588660268156243, "grad_norm": 0.748000979423523, "learning_rate": 1.9500981397865964e-05, "loss": 4.855133819580078, "step": 3170 }, { "epoch": 0.025669381593923297, "grad_norm": 1.888379693031311, "learning_rate": 1.9499365928127752e-05, "loss": 4.535345458984375, "step": 3180 }, { "epoch": 0.025750102919690352, "grad_norm": 1.2873598337173462, "learning_rate": 1.949775045838954e-05, "loss": 4.211750030517578, "step": 3190 }, { "epoch": 0.025830824245457407, "grad_norm": 0.852738082408905, "learning_rate": 1.9496134988651328e-05, "loss": 4.219026184082031, "step": 3200 }, { "epoch": 0.02591154557122446, "grad_norm": 1.1952135562896729, "learning_rate": 1.9494519518913112e-05, "loss": 4.56927375793457, "step": 3210 }, { "epoch": 0.025992266896991516, "grad_norm": 1.802846074104309, "learning_rate": 1.94929040491749e-05, "loss": 3.7937347412109377, "step": 3220 }, { "epoch": 0.02607298822275857, "grad_norm": 1.3536405563354492, "learning_rate": 1.9491288579436687e-05, "loss": 4.642712020874024, "step": 3230 }, { "epoch": 0.026153709548525625, "grad_norm": 1.25641667842865, "learning_rate": 1.9489673109698475e-05, "loss": 4.189925765991211, "step": 3240 }, { "epoch": 0.02623443087429268, "grad_norm": 1.166529893875122, "learning_rate": 1.948805763996026e-05, "loss": 3.887537384033203, "step": 3250 }, { "epoch": 0.026315152200059734, "grad_norm": 2.3406972885131836, "learning_rate": 1.9486442170222047e-05, "loss": 4.6502525329589846, "step": 3260 }, { "epoch": 0.02639587352582679, "grad_norm": 2.470658779144287, "learning_rate": 1.9484826700483835e-05, "loss": 4.3313640594482425, "step": 3270 }, { "epoch": 0.026476594851593843, "grad_norm": 2.5139923095703125, "learning_rate": 1.9483211230745622e-05, "loss": 4.815103912353516, "step": 3280 }, { "epoch": 0.026557316177360898, "grad_norm": 1.238246202468872, "learning_rate": 1.9481595761007407e-05, "loss": 4.205559539794922, "step": 3290 }, { "epoch": 0.026638037503127952, "grad_norm": 0.9554158449172974, "learning_rate": 1.9479980291269194e-05, "loss": 3.7399219512939452, "step": 3300 }, { "epoch": 0.026718758828895007, "grad_norm": 1.1354374885559082, "learning_rate": 1.9478364821530982e-05, "loss": 4.775108337402344, "step": 3310 }, { "epoch": 0.02679948015466206, "grad_norm": 1.4870502948760986, "learning_rate": 1.947674935179277e-05, "loss": 4.160108947753907, "step": 3320 }, { "epoch": 0.026880201480429116, "grad_norm": 1.200606346130371, "learning_rate": 1.9475133882054554e-05, "loss": 4.2458648681640625, "step": 3330 }, { "epoch": 0.02696092280619617, "grad_norm": 1.5908054113388062, "learning_rate": 1.9473518412316345e-05, "loss": 4.136464691162109, "step": 3340 }, { "epoch": 0.02704164413196322, "grad_norm": 1.2675144672393799, "learning_rate": 1.947190294257813e-05, "loss": 3.9521060943603517, "step": 3350 }, { "epoch": 0.027122365457730276, "grad_norm": 1.139556646347046, "learning_rate": 1.9470287472839917e-05, "loss": 3.8772369384765626, "step": 3360 }, { "epoch": 0.02720308678349733, "grad_norm": 1.1515920162200928, "learning_rate": 1.94686720031017e-05, "loss": 3.8213783264160157, "step": 3370 }, { "epoch": 0.027283808109264385, "grad_norm": 1.2653449773788452, "learning_rate": 1.9467056533363492e-05, "loss": 3.9583503723144533, "step": 3380 }, { "epoch": 0.02736452943503144, "grad_norm": 1.3468886613845825, "learning_rate": 1.9465441063625277e-05, "loss": 4.56561164855957, "step": 3390 }, { "epoch": 0.027445250760798495, "grad_norm": 0.7031143307685852, "learning_rate": 1.9463825593887064e-05, "loss": 4.018014526367187, "step": 3400 }, { "epoch": 0.02752597208656555, "grad_norm": 1.0128716230392456, "learning_rate": 1.946221012414885e-05, "loss": 3.919170379638672, "step": 3410 }, { "epoch": 0.027606693412332604, "grad_norm": 1.9794273376464844, "learning_rate": 1.946059465441064e-05, "loss": 4.629423522949219, "step": 3420 }, { "epoch": 0.027687414738099658, "grad_norm": 2.0417261123657227, "learning_rate": 1.9458979184672424e-05, "loss": 4.264686584472656, "step": 3430 }, { "epoch": 0.027768136063866713, "grad_norm": 1.1327261924743652, "learning_rate": 1.9457363714934212e-05, "loss": 4.075125122070313, "step": 3440 }, { "epoch": 0.027848857389633767, "grad_norm": 1.7301971912384033, "learning_rate": 1.9455748245195996e-05, "loss": 4.730712127685547, "step": 3450 }, { "epoch": 0.027929578715400822, "grad_norm": 1.430837869644165, "learning_rate": 1.9454132775457787e-05, "loss": 3.9423316955566405, "step": 3460 }, { "epoch": 0.028010300041167877, "grad_norm": 1.1418800354003906, "learning_rate": 1.945251730571957e-05, "loss": 4.891297912597656, "step": 3470 }, { "epoch": 0.02809102136693493, "grad_norm": 1.5035854578018188, "learning_rate": 1.945090183598136e-05, "loss": 4.314469909667968, "step": 3480 }, { "epoch": 0.028171742692701986, "grad_norm": 0.993755042552948, "learning_rate": 1.9449286366243147e-05, "loss": 4.661579132080078, "step": 3490 }, { "epoch": 0.02825246401846904, "grad_norm": 1.1762771606445312, "learning_rate": 1.9447670896504935e-05, "loss": 4.471311569213867, "step": 3500 }, { "epoch": 0.028333185344236095, "grad_norm": 1.2558646202087402, "learning_rate": 1.944605542676672e-05, "loss": 4.572037887573242, "step": 3510 }, { "epoch": 0.02841390667000315, "grad_norm": 1.0951550006866455, "learning_rate": 1.9444439957028507e-05, "loss": 4.457397079467773, "step": 3520 }, { "epoch": 0.028494627995770204, "grad_norm": 1.2312434911727905, "learning_rate": 1.9442824487290294e-05, "loss": 3.8217021942138674, "step": 3530 }, { "epoch": 0.02857534932153726, "grad_norm": 1.3522650003433228, "learning_rate": 1.9441209017552082e-05, "loss": 4.979827880859375, "step": 3540 }, { "epoch": 0.02865607064730431, "grad_norm": 1.3194055557250977, "learning_rate": 1.9439593547813866e-05, "loss": 4.224190139770508, "step": 3550 }, { "epoch": 0.028736791973071364, "grad_norm": 0.9287212491035461, "learning_rate": 1.9437978078075654e-05, "loss": 4.396086883544922, "step": 3560 }, { "epoch": 0.02881751329883842, "grad_norm": 1.5440181493759155, "learning_rate": 1.943636260833744e-05, "loss": 4.522200393676758, "step": 3570 }, { "epoch": 0.028898234624605473, "grad_norm": 1.1111488342285156, "learning_rate": 1.943474713859923e-05, "loss": 4.254176330566406, "step": 3580 }, { "epoch": 0.028978955950372528, "grad_norm": 1.3111894130706787, "learning_rate": 1.9433131668861014e-05, "loss": 4.1912586212158205, "step": 3590 }, { "epoch": 0.029059677276139582, "grad_norm": 1.2546119689941406, "learning_rate": 1.94315161991228e-05, "loss": 4.236508178710937, "step": 3600 }, { "epoch": 0.029140398601906637, "grad_norm": 1.776079773902893, "learning_rate": 1.942990072938459e-05, "loss": 4.105971145629883, "step": 3610 }, { "epoch": 0.02922111992767369, "grad_norm": 0.7897488474845886, "learning_rate": 1.9428285259646377e-05, "loss": 4.599190139770508, "step": 3620 }, { "epoch": 0.029301841253440746, "grad_norm": 1.2347339391708374, "learning_rate": 1.942666978990816e-05, "loss": 3.853728103637695, "step": 3630 }, { "epoch": 0.0293825625792078, "grad_norm": 1.0540499687194824, "learning_rate": 1.942505432016995e-05, "loss": 4.586027526855469, "step": 3640 }, { "epoch": 0.029463283904974855, "grad_norm": 0.967293918132782, "learning_rate": 1.9423438850431736e-05, "loss": 3.423984909057617, "step": 3650 }, { "epoch": 0.02954400523074191, "grad_norm": 1.6360868215560913, "learning_rate": 1.9421823380693524e-05, "loss": 4.931763076782227, "step": 3660 }, { "epoch": 0.029624726556508964, "grad_norm": 1.2976195812225342, "learning_rate": 1.942020791095531e-05, "loss": 3.9787925720214843, "step": 3670 }, { "epoch": 0.02970544788227602, "grad_norm": 1.0691293478012085, "learning_rate": 1.9418592441217096e-05, "loss": 4.711028671264648, "step": 3680 }, { "epoch": 0.029786169208043074, "grad_norm": 0.8172670006752014, "learning_rate": 1.9416976971478884e-05, "loss": 4.3123779296875, "step": 3690 }, { "epoch": 0.029866890533810128, "grad_norm": 1.134608268737793, "learning_rate": 1.941536150174067e-05, "loss": 4.226359558105469, "step": 3700 }, { "epoch": 0.029947611859577183, "grad_norm": 1.369916319847107, "learning_rate": 1.9413746032002456e-05, "loss": 4.302092742919922, "step": 3710 }, { "epoch": 0.030028333185344237, "grad_norm": 1.057639718055725, "learning_rate": 1.9412130562264244e-05, "loss": 4.154216384887695, "step": 3720 }, { "epoch": 0.030109054511111292, "grad_norm": 2.274972438812256, "learning_rate": 1.941051509252603e-05, "loss": 4.405239486694336, "step": 3730 }, { "epoch": 0.030189775836878346, "grad_norm": 0.9478704929351807, "learning_rate": 1.940889962278782e-05, "loss": 4.2814380645751955, "step": 3740 }, { "epoch": 0.0302704971626454, "grad_norm": 0.7811654806137085, "learning_rate": 1.9407284153049603e-05, "loss": 5.017758560180664, "step": 3750 }, { "epoch": 0.030351218488412452, "grad_norm": 1.3796765804290771, "learning_rate": 1.940566868331139e-05, "loss": 4.000130081176758, "step": 3760 }, { "epoch": 0.030431939814179507, "grad_norm": 0.9960059523582458, "learning_rate": 1.940405321357318e-05, "loss": 4.531874465942383, "step": 3770 }, { "epoch": 0.03051266113994656, "grad_norm": 0.924082338809967, "learning_rate": 1.9402437743834966e-05, "loss": 4.227778625488281, "step": 3780 }, { "epoch": 0.030593382465713616, "grad_norm": 1.896472454071045, "learning_rate": 1.940082227409675e-05, "loss": 4.528155517578125, "step": 3790 }, { "epoch": 0.03067410379148067, "grad_norm": 1.092664122581482, "learning_rate": 1.9399206804358538e-05, "loss": 3.9690166473388673, "step": 3800 }, { "epoch": 0.030754825117247725, "grad_norm": 1.3315439224243164, "learning_rate": 1.9397591334620326e-05, "loss": 3.9261688232421874, "step": 3810 }, { "epoch": 0.03083554644301478, "grad_norm": 0.7218206524848938, "learning_rate": 1.9395975864882114e-05, "loss": 3.9737468719482423, "step": 3820 }, { "epoch": 0.030916267768781834, "grad_norm": 1.4526208639144897, "learning_rate": 1.9394360395143898e-05, "loss": 5.082453918457031, "step": 3830 }, { "epoch": 0.03099698909454889, "grad_norm": 1.6770471334457397, "learning_rate": 1.9392744925405686e-05, "loss": 4.189311218261719, "step": 3840 }, { "epoch": 0.031077710420315943, "grad_norm": 1.1131359338760376, "learning_rate": 1.9391129455667473e-05, "loss": 4.455237960815429, "step": 3850 }, { "epoch": 0.031158431746082998, "grad_norm": 0.7263851165771484, "learning_rate": 1.938951398592926e-05, "loss": 4.398097229003906, "step": 3860 }, { "epoch": 0.031239153071850052, "grad_norm": 1.1412291526794434, "learning_rate": 1.9387898516191045e-05, "loss": 4.331891632080078, "step": 3870 }, { "epoch": 0.0313198743976171, "grad_norm": 0.9824315309524536, "learning_rate": 1.9386283046452833e-05, "loss": 3.941035842895508, "step": 3880 }, { "epoch": 0.03140059572338416, "grad_norm": 1.3157410621643066, "learning_rate": 1.938466757671462e-05, "loss": 4.078020477294922, "step": 3890 }, { "epoch": 0.03148131704915121, "grad_norm": 0.8108517527580261, "learning_rate": 1.938305210697641e-05, "loss": 4.229928207397461, "step": 3900 }, { "epoch": 0.03156203837491827, "grad_norm": 0.8173083662986755, "learning_rate": 1.9381436637238193e-05, "loss": 4.281687545776367, "step": 3910 }, { "epoch": 0.03164275970068532, "grad_norm": 1.490738868713379, "learning_rate": 1.937982116749998e-05, "loss": 4.6572517395019535, "step": 3920 }, { "epoch": 0.03172348102645238, "grad_norm": 1.2030012607574463, "learning_rate": 1.9378205697761768e-05, "loss": 4.601653289794922, "step": 3930 }, { "epoch": 0.03180420235221943, "grad_norm": 0.848134458065033, "learning_rate": 1.9376590228023556e-05, "loss": 3.9091724395751952, "step": 3940 }, { "epoch": 0.03188492367798649, "grad_norm": 1.2453960180282593, "learning_rate": 1.937497475828534e-05, "loss": 4.071609878540039, "step": 3950 }, { "epoch": 0.03196564500375354, "grad_norm": 1.2299169301986694, "learning_rate": 1.9373359288547128e-05, "loss": 4.007005310058593, "step": 3960 }, { "epoch": 0.0320463663295206, "grad_norm": 1.0224614143371582, "learning_rate": 1.9371743818808915e-05, "loss": 4.031898498535156, "step": 3970 }, { "epoch": 0.03212708765528765, "grad_norm": 1.4293650388717651, "learning_rate": 1.9370128349070703e-05, "loss": 4.118235397338867, "step": 3980 }, { "epoch": 0.03220780898105471, "grad_norm": 1.2078590393066406, "learning_rate": 1.936851287933249e-05, "loss": 4.4810737609863285, "step": 3990 }, { "epoch": 0.03228853030682176, "grad_norm": 1.2123640775680542, "learning_rate": 1.9366897409594275e-05, "loss": 3.8448314666748047, "step": 4000 }, { "epoch": 0.032369251632588816, "grad_norm": 1.003146767616272, "learning_rate": 1.9365281939856063e-05, "loss": 4.0328819274902346, "step": 4010 }, { "epoch": 0.03244997295835587, "grad_norm": 1.454888939857483, "learning_rate": 1.936366647011785e-05, "loss": 4.3949226379394535, "step": 4020 }, { "epoch": 0.032530694284122925, "grad_norm": 0.9025568962097168, "learning_rate": 1.9362051000379638e-05, "loss": 4.554126739501953, "step": 4030 }, { "epoch": 0.03261141560988998, "grad_norm": 1.196364402770996, "learning_rate": 1.9360435530641423e-05, "loss": 4.14268684387207, "step": 4040 }, { "epoch": 0.032692136935657035, "grad_norm": 1.0129077434539795, "learning_rate": 1.935882006090321e-05, "loss": 4.251414489746094, "step": 4050 }, { "epoch": 0.032772858261424086, "grad_norm": 0.869189977645874, "learning_rate": 1.9357204591164998e-05, "loss": 3.9674888610839845, "step": 4060 }, { "epoch": 0.03285357958719114, "grad_norm": 0.7661462426185608, "learning_rate": 1.9355589121426786e-05, "loss": 3.979930877685547, "step": 4070 }, { "epoch": 0.032934300912958195, "grad_norm": 0.6351110339164734, "learning_rate": 1.935397365168857e-05, "loss": 4.577171325683594, "step": 4080 }, { "epoch": 0.033015022238725246, "grad_norm": 1.1417897939682007, "learning_rate": 1.9352358181950358e-05, "loss": 4.376144027709961, "step": 4090 }, { "epoch": 0.033095743564492304, "grad_norm": 1.7613894939422607, "learning_rate": 1.9350742712212145e-05, "loss": 4.598385620117187, "step": 4100 }, { "epoch": 0.033176464890259355, "grad_norm": 0.8080301880836487, "learning_rate": 1.9349127242473933e-05, "loss": 4.087479019165039, "step": 4110 }, { "epoch": 0.03325718621602641, "grad_norm": 1.0578582286834717, "learning_rate": 1.9347511772735717e-05, "loss": 4.479826354980469, "step": 4120 }, { "epoch": 0.033337907541793464, "grad_norm": 1.14595365524292, "learning_rate": 1.9345896302997505e-05, "loss": 4.3087013244628904, "step": 4130 }, { "epoch": 0.03341862886756052, "grad_norm": 1.3634005784988403, "learning_rate": 1.9344280833259293e-05, "loss": 4.180454254150391, "step": 4140 }, { "epoch": 0.03349935019332757, "grad_norm": 1.2001070976257324, "learning_rate": 1.934266536352108e-05, "loss": 4.303666687011718, "step": 4150 }, { "epoch": 0.03358007151909463, "grad_norm": 0.7286772131919861, "learning_rate": 1.9341049893782865e-05, "loss": 3.7151885986328126, "step": 4160 }, { "epoch": 0.03366079284486168, "grad_norm": 1.0389859676361084, "learning_rate": 1.9339434424044652e-05, "loss": 3.6191333770751952, "step": 4170 }, { "epoch": 0.03374151417062874, "grad_norm": 1.3651437759399414, "learning_rate": 1.933781895430644e-05, "loss": 4.700528335571289, "step": 4180 }, { "epoch": 0.03382223549639579, "grad_norm": 0.9054883122444153, "learning_rate": 1.9336203484568228e-05, "loss": 4.313446044921875, "step": 4190 }, { "epoch": 0.03390295682216285, "grad_norm": 1.3313788175582886, "learning_rate": 1.9334588014830012e-05, "loss": 4.017636489868164, "step": 4200 }, { "epoch": 0.0339836781479299, "grad_norm": 0.9057358503341675, "learning_rate": 1.9332972545091803e-05, "loss": 4.148240280151367, "step": 4210 }, { "epoch": 0.03406439947369696, "grad_norm": 2.5509824752807617, "learning_rate": 1.9331357075353587e-05, "loss": 3.908145523071289, "step": 4220 }, { "epoch": 0.03414512079946401, "grad_norm": 1.3746283054351807, "learning_rate": 1.9329741605615375e-05, "loss": 4.055534744262696, "step": 4230 }, { "epoch": 0.03422584212523107, "grad_norm": 1.1969122886657715, "learning_rate": 1.932812613587716e-05, "loss": 4.297332000732422, "step": 4240 }, { "epoch": 0.03430656345099812, "grad_norm": 2.1060373783111572, "learning_rate": 1.932651066613895e-05, "loss": 4.221706390380859, "step": 4250 }, { "epoch": 0.03438728477676517, "grad_norm": 1.3658573627471924, "learning_rate": 1.9324895196400735e-05, "loss": 4.0743671417236325, "step": 4260 }, { "epoch": 0.03446800610253223, "grad_norm": 1.2644342184066772, "learning_rate": 1.9323279726662523e-05, "loss": 4.134588241577148, "step": 4270 }, { "epoch": 0.03454872742829928, "grad_norm": 1.1593537330627441, "learning_rate": 1.9321664256924307e-05, "loss": 4.415484619140625, "step": 4280 }, { "epoch": 0.03462944875406634, "grad_norm": 2.207176446914673, "learning_rate": 1.9320048787186098e-05, "loss": 4.119513702392578, "step": 4290 }, { "epoch": 0.03471017007983339, "grad_norm": 0.9121759533882141, "learning_rate": 1.9318433317447882e-05, "loss": 4.120559310913086, "step": 4300 }, { "epoch": 0.034790891405600446, "grad_norm": 1.2087671756744385, "learning_rate": 1.931681784770967e-05, "loss": 4.640917205810547, "step": 4310 }, { "epoch": 0.0348716127313675, "grad_norm": 1.4168176651000977, "learning_rate": 1.9315202377971454e-05, "loss": 4.072833251953125, "step": 4320 }, { "epoch": 0.034952334057134556, "grad_norm": 0.7847267389297485, "learning_rate": 1.9313586908233245e-05, "loss": 4.343181228637695, "step": 4330 }, { "epoch": 0.03503305538290161, "grad_norm": 2.1185338497161865, "learning_rate": 1.931197143849503e-05, "loss": 4.367210388183594, "step": 4340 }, { "epoch": 0.035113776708668665, "grad_norm": 1.0671948194503784, "learning_rate": 1.9310355968756817e-05, "loss": 5.098845672607422, "step": 4350 }, { "epoch": 0.035194498034435716, "grad_norm": 0.9551274180412292, "learning_rate": 1.9308740499018605e-05, "loss": 3.8260471343994142, "step": 4360 }, { "epoch": 0.035275219360202774, "grad_norm": 1.1351535320281982, "learning_rate": 1.9307125029280393e-05, "loss": 3.8483551025390623, "step": 4370 }, { "epoch": 0.035355940685969825, "grad_norm": 0.6715970635414124, "learning_rate": 1.9305509559542177e-05, "loss": 4.224506378173828, "step": 4380 }, { "epoch": 0.03543666201173688, "grad_norm": 2.0061821937561035, "learning_rate": 1.9303894089803965e-05, "loss": 3.7159828186035155, "step": 4390 }, { "epoch": 0.035517383337503934, "grad_norm": 2.135892629623413, "learning_rate": 1.9302278620065752e-05, "loss": 4.2576751708984375, "step": 4400 }, { "epoch": 0.03559810466327099, "grad_norm": 1.5156402587890625, "learning_rate": 1.930066315032754e-05, "loss": 4.247966766357422, "step": 4410 }, { "epoch": 0.03567882598903804, "grad_norm": 1.424990177154541, "learning_rate": 1.9299047680589324e-05, "loss": 4.329262161254883, "step": 4420 }, { "epoch": 0.0357595473148051, "grad_norm": 1.0522085428237915, "learning_rate": 1.9297432210851112e-05, "loss": 3.7093055725097654, "step": 4430 }, { "epoch": 0.03584026864057215, "grad_norm": 1.1846590042114258, "learning_rate": 1.92958167411129e-05, "loss": 4.258992004394531, "step": 4440 }, { "epoch": 0.03592098996633921, "grad_norm": 1.1967954635620117, "learning_rate": 1.9294201271374687e-05, "loss": 4.212047958374024, "step": 4450 }, { "epoch": 0.03600171129210626, "grad_norm": 1.5863118171691895, "learning_rate": 1.9292585801636472e-05, "loss": 4.304407119750977, "step": 4460 }, { "epoch": 0.03608243261787331, "grad_norm": 0.7898277640342712, "learning_rate": 1.929097033189826e-05, "loss": 4.381306457519531, "step": 4470 }, { "epoch": 0.03616315394364037, "grad_norm": 0.8019493818283081, "learning_rate": 1.9289354862160047e-05, "loss": 4.090197372436523, "step": 4480 }, { "epoch": 0.03624387526940742, "grad_norm": 1.4644495248794556, "learning_rate": 1.9287739392421835e-05, "loss": 4.560801315307617, "step": 4490 }, { "epoch": 0.03632459659517448, "grad_norm": 0.674584150314331, "learning_rate": 1.928612392268362e-05, "loss": 4.054994583129883, "step": 4500 }, { "epoch": 0.03640531792094153, "grad_norm": 0.6748393774032593, "learning_rate": 1.9284508452945407e-05, "loss": 4.165574264526367, "step": 4510 }, { "epoch": 0.03648603924670859, "grad_norm": 1.074075698852539, "learning_rate": 1.9282892983207195e-05, "loss": 4.128253173828125, "step": 4520 }, { "epoch": 0.03656676057247564, "grad_norm": 0.9617072939872742, "learning_rate": 1.9281277513468982e-05, "loss": 4.272773742675781, "step": 4530 }, { "epoch": 0.0366474818982427, "grad_norm": 0.9436514377593994, "learning_rate": 1.9279662043730767e-05, "loss": 4.446382904052735, "step": 4540 }, { "epoch": 0.03672820322400975, "grad_norm": 0.6509465575218201, "learning_rate": 1.9278046573992554e-05, "loss": 4.157065963745117, "step": 4550 }, { "epoch": 0.03680892454977681, "grad_norm": 0.8939435482025146, "learning_rate": 1.9276431104254342e-05, "loss": 4.148159790039062, "step": 4560 }, { "epoch": 0.03688964587554386, "grad_norm": 1.3814424276351929, "learning_rate": 1.927481563451613e-05, "loss": 4.450914001464843, "step": 4570 }, { "epoch": 0.036970367201310916, "grad_norm": 0.9343910217285156, "learning_rate": 1.9273200164777914e-05, "loss": 3.5744911193847657, "step": 4580 }, { "epoch": 0.03705108852707797, "grad_norm": 0.9173043370246887, "learning_rate": 1.92715846950397e-05, "loss": 3.9256092071533204, "step": 4590 }, { "epoch": 0.037131809852845025, "grad_norm": 0.8665229678153992, "learning_rate": 1.926996922530149e-05, "loss": 3.815196990966797, "step": 4600 }, { "epoch": 0.03721253117861208, "grad_norm": 0.8925291299819946, "learning_rate": 1.9268353755563277e-05, "loss": 4.852031707763672, "step": 4610 }, { "epoch": 0.037293252504379135, "grad_norm": 1.6400132179260254, "learning_rate": 1.926673828582506e-05, "loss": 4.632526779174805, "step": 4620 }, { "epoch": 0.037373973830146186, "grad_norm": 0.942594051361084, "learning_rate": 1.926512281608685e-05, "loss": 4.751037979125977, "step": 4630 }, { "epoch": 0.037454695155913244, "grad_norm": 1.2548611164093018, "learning_rate": 1.9263507346348637e-05, "loss": 3.6129596710205076, "step": 4640 }, { "epoch": 0.037535416481680295, "grad_norm": 0.8059343695640564, "learning_rate": 1.9261891876610424e-05, "loss": 4.238493347167969, "step": 4650 }, { "epoch": 0.03761613780744735, "grad_norm": 1.2073581218719482, "learning_rate": 1.926027640687221e-05, "loss": 3.8737815856933593, "step": 4660 }, { "epoch": 0.037696859133214404, "grad_norm": 0.7249970436096191, "learning_rate": 1.9258660937133996e-05, "loss": 4.082271575927734, "step": 4670 }, { "epoch": 0.037777580458981455, "grad_norm": 0.8260923027992249, "learning_rate": 1.9257045467395784e-05, "loss": 4.18199348449707, "step": 4680 }, { "epoch": 0.03785830178474851, "grad_norm": 1.3545283079147339, "learning_rate": 1.9255429997657572e-05, "loss": 3.9032451629638674, "step": 4690 }, { "epoch": 0.037939023110515564, "grad_norm": 0.9728230834007263, "learning_rate": 1.9253814527919356e-05, "loss": 4.349707794189453, "step": 4700 }, { "epoch": 0.03801974443628262, "grad_norm": 1.1879853010177612, "learning_rate": 1.9252199058181144e-05, "loss": 4.729844665527343, "step": 4710 }, { "epoch": 0.03810046576204967, "grad_norm": 0.9515864849090576, "learning_rate": 1.925058358844293e-05, "loss": 3.938319778442383, "step": 4720 }, { "epoch": 0.03818118708781673, "grad_norm": 1.4984008073806763, "learning_rate": 1.924896811870472e-05, "loss": 3.7671226501464843, "step": 4730 }, { "epoch": 0.03826190841358378, "grad_norm": 0.9582381248474121, "learning_rate": 1.9247352648966503e-05, "loss": 3.9906166076660154, "step": 4740 }, { "epoch": 0.03834262973935084, "grad_norm": 1.0892877578735352, "learning_rate": 1.924573717922829e-05, "loss": 4.975772476196289, "step": 4750 }, { "epoch": 0.03842335106511789, "grad_norm": 0.9666255712509155, "learning_rate": 1.924412170949008e-05, "loss": 4.022412872314453, "step": 4760 }, { "epoch": 0.03850407239088495, "grad_norm": 0.6022693514823914, "learning_rate": 1.9242506239751867e-05, "loss": 3.6356533050537108, "step": 4770 }, { "epoch": 0.038584793716652, "grad_norm": 0.803378701210022, "learning_rate": 1.924089077001365e-05, "loss": 4.343573379516601, "step": 4780 }, { "epoch": 0.03866551504241906, "grad_norm": 1.1421927213668823, "learning_rate": 1.923927530027544e-05, "loss": 4.032664108276367, "step": 4790 }, { "epoch": 0.03874623636818611, "grad_norm": 1.6517492532730103, "learning_rate": 1.9237659830537226e-05, "loss": 4.761684799194336, "step": 4800 }, { "epoch": 0.03882695769395317, "grad_norm": 1.2219352722167969, "learning_rate": 1.9236044360799014e-05, "loss": 4.445058059692383, "step": 4810 }, { "epoch": 0.03890767901972022, "grad_norm": 0.6833744645118713, "learning_rate": 1.9234428891060798e-05, "loss": 4.229453277587891, "step": 4820 }, { "epoch": 0.03898840034548728, "grad_norm": 1.3921006917953491, "learning_rate": 1.9232813421322586e-05, "loss": 4.762325286865234, "step": 4830 }, { "epoch": 0.03906912167125433, "grad_norm": 0.9709562063217163, "learning_rate": 1.9231197951584374e-05, "loss": 3.622407150268555, "step": 4840 }, { "epoch": 0.039149842997021386, "grad_norm": 1.5092885494232178, "learning_rate": 1.922958248184616e-05, "loss": 4.497683715820313, "step": 4850 }, { "epoch": 0.03923056432278844, "grad_norm": 0.8261246681213379, "learning_rate": 1.9227967012107946e-05, "loss": 3.5552955627441407, "step": 4860 }, { "epoch": 0.03931128564855549, "grad_norm": 2.256720781326294, "learning_rate": 1.9226351542369733e-05, "loss": 4.125643920898438, "step": 4870 }, { "epoch": 0.039392006974322546, "grad_norm": 0.8892691731452942, "learning_rate": 1.922473607263152e-05, "loss": 4.414391708374024, "step": 4880 }, { "epoch": 0.0394727283000896, "grad_norm": 1.412881851196289, "learning_rate": 1.922312060289331e-05, "loss": 4.200001907348633, "step": 4890 }, { "epoch": 0.039553449625856656, "grad_norm": 1.2550503015518188, "learning_rate": 1.9221505133155093e-05, "loss": 4.702828979492187, "step": 4900 }, { "epoch": 0.03963417095162371, "grad_norm": 1.8890918493270874, "learning_rate": 1.921988966341688e-05, "loss": 3.611167144775391, "step": 4910 }, { "epoch": 0.039714892277390765, "grad_norm": 1.4724419116973877, "learning_rate": 1.921827419367867e-05, "loss": 4.3413848876953125, "step": 4920 }, { "epoch": 0.039795613603157816, "grad_norm": 0.7915105223655701, "learning_rate": 1.9216658723940456e-05, "loss": 4.19413948059082, "step": 4930 }, { "epoch": 0.039876334928924874, "grad_norm": 0.9996341466903687, "learning_rate": 1.921504325420224e-05, "loss": 4.301041793823242, "step": 4940 }, { "epoch": 0.039957056254691925, "grad_norm": 1.1935316324234009, "learning_rate": 1.9213427784464028e-05, "loss": 4.337735748291015, "step": 4950 }, { "epoch": 0.04003777758045898, "grad_norm": 0.8473761081695557, "learning_rate": 1.9211812314725816e-05, "loss": 4.205547332763672, "step": 4960 }, { "epoch": 0.040118498906226034, "grad_norm": 0.8916776180267334, "learning_rate": 1.9210196844987603e-05, "loss": 3.7458457946777344, "step": 4970 }, { "epoch": 0.04019922023199309, "grad_norm": 1.3832401037216187, "learning_rate": 1.9208581375249388e-05, "loss": 3.9835861206054686, "step": 4980 }, { "epoch": 0.04027994155776014, "grad_norm": 1.3916569948196411, "learning_rate": 1.9206965905511175e-05, "loss": 4.331658935546875, "step": 4990 }, { "epoch": 0.0403606628835272, "grad_norm": 1.459805965423584, "learning_rate": 1.9205350435772963e-05, "loss": 4.198551177978516, "step": 5000 }, { "epoch": 0.04044138420929425, "grad_norm": 0.9682134985923767, "learning_rate": 1.920373496603475e-05, "loss": 3.8302661895751955, "step": 5010 }, { "epoch": 0.04052210553506131, "grad_norm": 0.8671865463256836, "learning_rate": 1.9202119496296535e-05, "loss": 3.8196338653564452, "step": 5020 }, { "epoch": 0.04060282686082836, "grad_norm": 1.3289881944656372, "learning_rate": 1.9200504026558323e-05, "loss": 3.972971725463867, "step": 5030 }, { "epoch": 0.04068354818659542, "grad_norm": 1.004990816116333, "learning_rate": 1.919888855682011e-05, "loss": 4.2178184509277346, "step": 5040 }, { "epoch": 0.04076426951236247, "grad_norm": 1.4057170152664185, "learning_rate": 1.9197273087081898e-05, "loss": 3.8450344085693358, "step": 5050 }, { "epoch": 0.04084499083812953, "grad_norm": 1.0647717714309692, "learning_rate": 1.9195657617343683e-05, "loss": 4.162871551513672, "step": 5060 }, { "epoch": 0.04092571216389658, "grad_norm": 0.8239194750785828, "learning_rate": 1.919404214760547e-05, "loss": 3.757210540771484, "step": 5070 }, { "epoch": 0.04100643348966363, "grad_norm": 1.3128985166549683, "learning_rate": 1.9192426677867258e-05, "loss": 3.8780059814453125, "step": 5080 }, { "epoch": 0.04108715481543069, "grad_norm": 2.1348795890808105, "learning_rate": 1.9190811208129046e-05, "loss": 4.200281143188477, "step": 5090 }, { "epoch": 0.04116787614119774, "grad_norm": 2.325758695602417, "learning_rate": 1.918919573839083e-05, "loss": 3.948551559448242, "step": 5100 }, { "epoch": 0.0412485974669648, "grad_norm": 0.5688397884368896, "learning_rate": 1.9187580268652618e-05, "loss": 4.429008102416992, "step": 5110 }, { "epoch": 0.04132931879273185, "grad_norm": 0.9356141686439514, "learning_rate": 1.9185964798914405e-05, "loss": 4.171221923828125, "step": 5120 }, { "epoch": 0.04141004011849891, "grad_norm": 1.2397905588150024, "learning_rate": 1.9184349329176193e-05, "loss": 4.464538192749023, "step": 5130 }, { "epoch": 0.04149076144426596, "grad_norm": 3.198955774307251, "learning_rate": 1.9182733859437977e-05, "loss": 3.929977035522461, "step": 5140 }, { "epoch": 0.041571482770033016, "grad_norm": 1.0946964025497437, "learning_rate": 1.9181118389699765e-05, "loss": 4.185892105102539, "step": 5150 }, { "epoch": 0.04165220409580007, "grad_norm": 1.486323595046997, "learning_rate": 1.9179502919961553e-05, "loss": 3.7925064086914064, "step": 5160 }, { "epoch": 0.041732925421567126, "grad_norm": 1.1650102138519287, "learning_rate": 1.917788745022334e-05, "loss": 4.048864364624023, "step": 5170 }, { "epoch": 0.04181364674733418, "grad_norm": 0.8527909517288208, "learning_rate": 1.9176271980485125e-05, "loss": 3.6684806823730467, "step": 5180 }, { "epoch": 0.041894368073101235, "grad_norm": 1.5118789672851562, "learning_rate": 1.9174656510746912e-05, "loss": 3.750540542602539, "step": 5190 }, { "epoch": 0.041975089398868286, "grad_norm": 1.6808425188064575, "learning_rate": 1.91730410410087e-05, "loss": 3.707319641113281, "step": 5200 }, { "epoch": 0.042055810724635344, "grad_norm": 1.1960698366165161, "learning_rate": 1.9171425571270488e-05, "loss": 4.421387100219727, "step": 5210 }, { "epoch": 0.042136532050402395, "grad_norm": 1.4742306470870972, "learning_rate": 1.9169810101532272e-05, "loss": 4.583295822143555, "step": 5220 }, { "epoch": 0.04221725337616945, "grad_norm": 1.0240085124969482, "learning_rate": 1.9168194631794063e-05, "loss": 4.060422134399414, "step": 5230 }, { "epoch": 0.042297974701936504, "grad_norm": 1.1885451078414917, "learning_rate": 1.916657916205585e-05, "loss": 3.8186573028564452, "step": 5240 }, { "epoch": 0.04237869602770356, "grad_norm": 0.7262006998062134, "learning_rate": 1.9164963692317635e-05, "loss": 3.525778961181641, "step": 5250 }, { "epoch": 0.04245941735347061, "grad_norm": 1.8235667943954468, "learning_rate": 1.9163348222579423e-05, "loss": 4.036094665527344, "step": 5260 }, { "epoch": 0.04254013867923767, "grad_norm": 1.0556801557540894, "learning_rate": 1.916173275284121e-05, "loss": 5.021086883544922, "step": 5270 }, { "epoch": 0.04262086000500472, "grad_norm": 1.0134714841842651, "learning_rate": 1.9160117283102998e-05, "loss": 4.157197952270508, "step": 5280 }, { "epoch": 0.04270158133077177, "grad_norm": 0.8538205027580261, "learning_rate": 1.9158501813364783e-05, "loss": 3.735817718505859, "step": 5290 }, { "epoch": 0.04278230265653883, "grad_norm": 1.5957881212234497, "learning_rate": 1.915688634362657e-05, "loss": 3.765514373779297, "step": 5300 }, { "epoch": 0.04286302398230588, "grad_norm": 1.1164336204528809, "learning_rate": 1.9155270873888358e-05, "loss": 4.169794082641602, "step": 5310 }, { "epoch": 0.04294374530807294, "grad_norm": 1.2888332605361938, "learning_rate": 1.9153655404150146e-05, "loss": 4.214879989624023, "step": 5320 }, { "epoch": 0.04302446663383999, "grad_norm": 0.8516678214073181, "learning_rate": 1.915203993441193e-05, "loss": 3.9896224975585937, "step": 5330 }, { "epoch": 0.04310518795960705, "grad_norm": 0.7956323027610779, "learning_rate": 1.9150424464673718e-05, "loss": 3.688436508178711, "step": 5340 }, { "epoch": 0.0431859092853741, "grad_norm": 1.6798492670059204, "learning_rate": 1.9148808994935505e-05, "loss": 3.9906211853027345, "step": 5350 }, { "epoch": 0.04326663061114116, "grad_norm": 1.0246738195419312, "learning_rate": 1.9147193525197293e-05, "loss": 3.579421615600586, "step": 5360 }, { "epoch": 0.04334735193690821, "grad_norm": 1.1617497205734253, "learning_rate": 1.9145578055459077e-05, "loss": 4.325425338745117, "step": 5370 }, { "epoch": 0.04342807326267527, "grad_norm": 1.4083023071289062, "learning_rate": 1.9143962585720865e-05, "loss": 4.162380218505859, "step": 5380 }, { "epoch": 0.04350879458844232, "grad_norm": 1.3433021306991577, "learning_rate": 1.9142347115982653e-05, "loss": 4.616386795043946, "step": 5390 }, { "epoch": 0.04358951591420938, "grad_norm": 1.5019232034683228, "learning_rate": 1.914073164624444e-05, "loss": 4.003988265991211, "step": 5400 }, { "epoch": 0.04367023723997643, "grad_norm": 1.5149961709976196, "learning_rate": 1.9139116176506225e-05, "loss": 4.798352432250977, "step": 5410 }, { "epoch": 0.043750958565743486, "grad_norm": 0.9946447014808655, "learning_rate": 1.9137500706768012e-05, "loss": 4.177203369140625, "step": 5420 }, { "epoch": 0.04383167989151054, "grad_norm": 0.844789981842041, "learning_rate": 1.91358852370298e-05, "loss": 3.9293521881103515, "step": 5430 }, { "epoch": 0.043912401217277595, "grad_norm": 0.9283713698387146, "learning_rate": 1.9134269767291588e-05, "loss": 3.852497100830078, "step": 5440 }, { "epoch": 0.04399312254304465, "grad_norm": 0.6623693704605103, "learning_rate": 1.9132654297553372e-05, "loss": 4.091447830200195, "step": 5450 }, { "epoch": 0.044073843868811705, "grad_norm": 1.4181435108184814, "learning_rate": 1.913103882781516e-05, "loss": 4.168763732910156, "step": 5460 }, { "epoch": 0.044154565194578756, "grad_norm": 1.0324381589889526, "learning_rate": 1.9129423358076947e-05, "loss": 3.7483840942382813, "step": 5470 }, { "epoch": 0.04423528652034581, "grad_norm": 0.9585906267166138, "learning_rate": 1.9127807888338735e-05, "loss": 3.9038352966308594, "step": 5480 }, { "epoch": 0.044316007846112865, "grad_norm": 0.8372941613197327, "learning_rate": 1.912619241860052e-05, "loss": 4.063885879516602, "step": 5490 }, { "epoch": 0.044396729171879916, "grad_norm": 0.5987200140953064, "learning_rate": 1.9124576948862307e-05, "loss": 4.434486007690429, "step": 5500 }, { "epoch": 0.044477450497646974, "grad_norm": 0.852415919303894, "learning_rate": 1.9122961479124095e-05, "loss": 3.7098175048828126, "step": 5510 }, { "epoch": 0.044558171823414025, "grad_norm": 0.979500949382782, "learning_rate": 1.9121346009385883e-05, "loss": 4.162363433837891, "step": 5520 }, { "epoch": 0.04463889314918108, "grad_norm": 1.2809529304504395, "learning_rate": 1.9119730539647667e-05, "loss": 4.168955230712891, "step": 5530 }, { "epoch": 0.044719614474948134, "grad_norm": 1.2895084619522095, "learning_rate": 1.9118115069909455e-05, "loss": 4.006018829345703, "step": 5540 }, { "epoch": 0.04480033580071519, "grad_norm": 0.9115367531776428, "learning_rate": 1.9116499600171242e-05, "loss": 4.813491439819336, "step": 5550 }, { "epoch": 0.04488105712648224, "grad_norm": 1.1569675207138062, "learning_rate": 1.911488413043303e-05, "loss": 3.887179946899414, "step": 5560 }, { "epoch": 0.0449617784522493, "grad_norm": 0.8607803583145142, "learning_rate": 1.9113268660694814e-05, "loss": 4.264207077026367, "step": 5570 }, { "epoch": 0.04504249977801635, "grad_norm": 1.0448269844055176, "learning_rate": 1.9111653190956602e-05, "loss": 4.161569976806641, "step": 5580 }, { "epoch": 0.04512322110378341, "grad_norm": 0.6849669814109802, "learning_rate": 1.911003772121839e-05, "loss": 4.545206832885742, "step": 5590 }, { "epoch": 0.04520394242955046, "grad_norm": 1.435592532157898, "learning_rate": 1.9108422251480177e-05, "loss": 4.185760498046875, "step": 5600 }, { "epoch": 0.04528466375531752, "grad_norm": 0.9244351387023926, "learning_rate": 1.910680678174196e-05, "loss": 4.234328842163086, "step": 5610 }, { "epoch": 0.04536538508108457, "grad_norm": 1.3566993474960327, "learning_rate": 1.910519131200375e-05, "loss": 3.688508987426758, "step": 5620 }, { "epoch": 0.04544610640685163, "grad_norm": 1.1979691982269287, "learning_rate": 1.9103575842265537e-05, "loss": 4.073102951049805, "step": 5630 }, { "epoch": 0.04552682773261868, "grad_norm": 1.5290480852127075, "learning_rate": 1.9101960372527325e-05, "loss": 3.6690673828125, "step": 5640 }, { "epoch": 0.04560754905838574, "grad_norm": 0.806842565536499, "learning_rate": 1.910034490278911e-05, "loss": 4.304795837402343, "step": 5650 }, { "epoch": 0.04568827038415279, "grad_norm": 1.5269551277160645, "learning_rate": 1.9098729433050897e-05, "loss": 3.49088134765625, "step": 5660 }, { "epoch": 0.04576899170991985, "grad_norm": 1.2933313846588135, "learning_rate": 1.9097113963312684e-05, "loss": 4.448709869384766, "step": 5670 }, { "epoch": 0.0458497130356869, "grad_norm": 1.1670160293579102, "learning_rate": 1.9095498493574472e-05, "loss": 3.6480335235595702, "step": 5680 }, { "epoch": 0.04593043436145395, "grad_norm": 0.8920729160308838, "learning_rate": 1.9093883023836256e-05, "loss": 4.140672302246093, "step": 5690 }, { "epoch": 0.04601115568722101, "grad_norm": 0.898787796497345, "learning_rate": 1.9092267554098044e-05, "loss": 4.205354309082031, "step": 5700 }, { "epoch": 0.04609187701298806, "grad_norm": 0.7649586796760559, "learning_rate": 1.9090652084359832e-05, "loss": 4.221016693115234, "step": 5710 }, { "epoch": 0.046172598338755116, "grad_norm": 1.234334111213684, "learning_rate": 1.908903661462162e-05, "loss": 4.151939010620117, "step": 5720 }, { "epoch": 0.04625331966452217, "grad_norm": 1.1396968364715576, "learning_rate": 1.9087421144883404e-05, "loss": 4.122473526000976, "step": 5730 }, { "epoch": 0.046334040990289226, "grad_norm": 1.4093493223190308, "learning_rate": 1.908580567514519e-05, "loss": 3.674749755859375, "step": 5740 }, { "epoch": 0.04641476231605628, "grad_norm": 0.8582451939582825, "learning_rate": 1.908419020540698e-05, "loss": 3.5276462554931642, "step": 5750 }, { "epoch": 0.046495483641823335, "grad_norm": 1.3917633295059204, "learning_rate": 1.9082574735668767e-05, "loss": 3.555957794189453, "step": 5760 }, { "epoch": 0.046576204967590386, "grad_norm": 0.7995463609695435, "learning_rate": 1.908095926593055e-05, "loss": 4.573201751708984, "step": 5770 }, { "epoch": 0.046656926293357444, "grad_norm": 1.2856320142745972, "learning_rate": 1.907934379619234e-05, "loss": 4.306854248046875, "step": 5780 }, { "epoch": 0.046737647619124495, "grad_norm": 1.0515446662902832, "learning_rate": 1.9077728326454126e-05, "loss": 4.460754013061523, "step": 5790 }, { "epoch": 0.04681836894489155, "grad_norm": 1.2091885805130005, "learning_rate": 1.9076112856715914e-05, "loss": 3.980645751953125, "step": 5800 }, { "epoch": 0.046899090270658604, "grad_norm": 1.3868029117584229, "learning_rate": 1.90744973869777e-05, "loss": 3.6044158935546875, "step": 5810 }, { "epoch": 0.04697981159642566, "grad_norm": 1.3830235004425049, "learning_rate": 1.9072881917239486e-05, "loss": 3.7806507110595704, "step": 5820 }, { "epoch": 0.04706053292219271, "grad_norm": 1.3184432983398438, "learning_rate": 1.9071266447501274e-05, "loss": 4.3821861267089846, "step": 5830 }, { "epoch": 0.04714125424795977, "grad_norm": 1.267398715019226, "learning_rate": 1.906965097776306e-05, "loss": 3.6490550994873048, "step": 5840 }, { "epoch": 0.04722197557372682, "grad_norm": 1.1581635475158691, "learning_rate": 1.9068035508024846e-05, "loss": 3.655731201171875, "step": 5850 }, { "epoch": 0.04730269689949388, "grad_norm": 1.3043569326400757, "learning_rate": 1.9066420038286634e-05, "loss": 4.004937362670899, "step": 5860 }, { "epoch": 0.04738341822526093, "grad_norm": 0.7337250113487244, "learning_rate": 1.906480456854842e-05, "loss": 4.095263290405273, "step": 5870 }, { "epoch": 0.04746413955102799, "grad_norm": 0.9479101300239563, "learning_rate": 1.906318909881021e-05, "loss": 3.978697967529297, "step": 5880 }, { "epoch": 0.04754486087679504, "grad_norm": 0.9733469486236572, "learning_rate": 1.9061573629071993e-05, "loss": 4.443878555297852, "step": 5890 }, { "epoch": 0.04762558220256209, "grad_norm": 0.6921318769454956, "learning_rate": 1.905995815933378e-05, "loss": 3.644179916381836, "step": 5900 }, { "epoch": 0.04770630352832915, "grad_norm": 1.8560758829116821, "learning_rate": 1.905834268959557e-05, "loss": 4.106679153442383, "step": 5910 }, { "epoch": 0.0477870248540962, "grad_norm": 0.7568206787109375, "learning_rate": 1.9056727219857356e-05, "loss": 4.238958740234375, "step": 5920 }, { "epoch": 0.04786774617986326, "grad_norm": 0.906342089176178, "learning_rate": 1.905511175011914e-05, "loss": 3.5326702117919924, "step": 5930 }, { "epoch": 0.04794846750563031, "grad_norm": 0.8412553668022156, "learning_rate": 1.905349628038093e-05, "loss": 4.210332870483398, "step": 5940 }, { "epoch": 0.04802918883139737, "grad_norm": 1.4643033742904663, "learning_rate": 1.9051880810642716e-05, "loss": 3.8647884368896483, "step": 5950 }, { "epoch": 0.04810991015716442, "grad_norm": 1.8518877029418945, "learning_rate": 1.9050265340904504e-05, "loss": 3.6207355499267577, "step": 5960 }, { "epoch": 0.04819063148293148, "grad_norm": 1.4118372201919556, "learning_rate": 1.9048649871166288e-05, "loss": 3.7496509552001953, "step": 5970 }, { "epoch": 0.04827135280869853, "grad_norm": 0.9324256777763367, "learning_rate": 1.9047034401428076e-05, "loss": 3.627254867553711, "step": 5980 }, { "epoch": 0.048352074134465586, "grad_norm": 0.7227241396903992, "learning_rate": 1.9045418931689863e-05, "loss": 3.583795928955078, "step": 5990 }, { "epoch": 0.04843279546023264, "grad_norm": 2.7846696376800537, "learning_rate": 1.904380346195165e-05, "loss": 4.0264934539794925, "step": 6000 }, { "epoch": 0.048513516785999695, "grad_norm": 1.1664056777954102, "learning_rate": 1.9042187992213435e-05, "loss": 3.9415283203125, "step": 6010 }, { "epoch": 0.04859423811176675, "grad_norm": 1.6544018983840942, "learning_rate": 1.9040572522475223e-05, "loss": 3.9040496826171873, "step": 6020 }, { "epoch": 0.048674959437533805, "grad_norm": 6.293285846710205, "learning_rate": 1.903895705273701e-05, "loss": 4.594560623168945, "step": 6030 }, { "epoch": 0.048755680763300856, "grad_norm": 1.126058578491211, "learning_rate": 1.90373415829988e-05, "loss": 3.9040287017822264, "step": 6040 }, { "epoch": 0.048836402089067914, "grad_norm": 0.9170766472816467, "learning_rate": 1.9035726113260583e-05, "loss": 3.780759811401367, "step": 6050 }, { "epoch": 0.048917123414834965, "grad_norm": 0.7595223188400269, "learning_rate": 1.903411064352237e-05, "loss": 3.602486801147461, "step": 6060 }, { "epoch": 0.04899784474060202, "grad_norm": 1.1282919645309448, "learning_rate": 1.9032495173784158e-05, "loss": 4.470493316650391, "step": 6070 }, { "epoch": 0.049078566066369074, "grad_norm": 0.7810491919517517, "learning_rate": 1.9030879704045946e-05, "loss": 4.153773498535156, "step": 6080 }, { "epoch": 0.049159287392136125, "grad_norm": 0.9893436431884766, "learning_rate": 1.902926423430773e-05, "loss": 3.8732452392578125, "step": 6090 }, { "epoch": 0.04924000871790318, "grad_norm": 0.7720766663551331, "learning_rate": 1.902764876456952e-05, "loss": 4.015435028076172, "step": 6100 }, { "epoch": 0.049320730043670234, "grad_norm": 1.9018884897232056, "learning_rate": 1.9026033294831306e-05, "loss": 4.045674514770508, "step": 6110 }, { "epoch": 0.04940145136943729, "grad_norm": 1.1263206005096436, "learning_rate": 1.9024417825093093e-05, "loss": 4.006409454345703, "step": 6120 }, { "epoch": 0.04948217269520434, "grad_norm": 1.2903450727462769, "learning_rate": 1.9022802355354878e-05, "loss": 4.18851318359375, "step": 6130 }, { "epoch": 0.0495628940209714, "grad_norm": 1.0385918617248535, "learning_rate": 1.902118688561667e-05, "loss": 3.7207359313964843, "step": 6140 }, { "epoch": 0.04964361534673845, "grad_norm": 1.3576380014419556, "learning_rate": 1.9019571415878453e-05, "loss": 4.117983627319336, "step": 6150 }, { "epoch": 0.04972433667250551, "grad_norm": 1.1138933897018433, "learning_rate": 1.901795594614024e-05, "loss": 3.696773147583008, "step": 6160 }, { "epoch": 0.04980505799827256, "grad_norm": 1.6339868307113647, "learning_rate": 1.9016340476402025e-05, "loss": 3.6214176177978517, "step": 6170 }, { "epoch": 0.04988577932403962, "grad_norm": 1.588964819908142, "learning_rate": 1.9014725006663816e-05, "loss": 4.223176193237305, "step": 6180 }, { "epoch": 0.04996650064980667, "grad_norm": 0.5523331761360168, "learning_rate": 1.90131095369256e-05, "loss": 3.6767444610595703, "step": 6190 }, { "epoch": 0.05004722197557373, "grad_norm": 1.7451343536376953, "learning_rate": 1.9011494067187388e-05, "loss": 3.9525184631347656, "step": 6200 }, { "epoch": 0.05012794330134078, "grad_norm": 1.089432954788208, "learning_rate": 1.9009878597449172e-05, "loss": 3.7325180053710936, "step": 6210 }, { "epoch": 0.05020866462710784, "grad_norm": 1.3075957298278809, "learning_rate": 1.9008263127710963e-05, "loss": 4.3377025604248045, "step": 6220 }, { "epoch": 0.05028938595287489, "grad_norm": 1.0671271085739136, "learning_rate": 1.9006647657972748e-05, "loss": 3.9305957794189452, "step": 6230 }, { "epoch": 0.05037010727864195, "grad_norm": 1.5393562316894531, "learning_rate": 1.9005032188234535e-05, "loss": 4.288554000854492, "step": 6240 }, { "epoch": 0.050450828604409, "grad_norm": 1.294716477394104, "learning_rate": 1.9003416718496323e-05, "loss": 3.8598373413085936, "step": 6250 }, { "epoch": 0.050531549930176056, "grad_norm": 0.9417953491210938, "learning_rate": 1.900180124875811e-05, "loss": 3.656443786621094, "step": 6260 }, { "epoch": 0.05061227125594311, "grad_norm": 1.4389121532440186, "learning_rate": 1.9000185779019895e-05, "loss": 4.14807243347168, "step": 6270 }, { "epoch": 0.050692992581710165, "grad_norm": 4.598487377166748, "learning_rate": 1.8998570309281683e-05, "loss": 3.975421905517578, "step": 6280 }, { "epoch": 0.050773713907477216, "grad_norm": 0.8044185638427734, "learning_rate": 1.899695483954347e-05, "loss": 3.9533157348632812, "step": 6290 }, { "epoch": 0.05085443523324427, "grad_norm": 1.0549228191375732, "learning_rate": 1.8995339369805258e-05, "loss": 4.009024810791016, "step": 6300 }, { "epoch": 0.050935156559011326, "grad_norm": 1.331337571144104, "learning_rate": 1.8993723900067042e-05, "loss": 4.138969421386719, "step": 6310 }, { "epoch": 0.05101587788477838, "grad_norm": 0.9674131274223328, "learning_rate": 1.899210843032883e-05, "loss": 3.527405548095703, "step": 6320 }, { "epoch": 0.051096599210545435, "grad_norm": 0.9760224223136902, "learning_rate": 1.8990492960590618e-05, "loss": 4.16094741821289, "step": 6330 }, { "epoch": 0.051177320536312486, "grad_norm": 0.8731797933578491, "learning_rate": 1.8988877490852406e-05, "loss": 3.898091506958008, "step": 6340 }, { "epoch": 0.051258041862079544, "grad_norm": 1.0186444520950317, "learning_rate": 1.898726202111419e-05, "loss": 3.8052906036376952, "step": 6350 }, { "epoch": 0.051338763187846595, "grad_norm": 0.922085702419281, "learning_rate": 1.8985646551375978e-05, "loss": 4.0799610137939455, "step": 6360 }, { "epoch": 0.05141948451361365, "grad_norm": 0.8557979464530945, "learning_rate": 1.8984031081637765e-05, "loss": 3.7797386169433596, "step": 6370 }, { "epoch": 0.051500205839380704, "grad_norm": 0.9956483840942383, "learning_rate": 1.8982415611899553e-05, "loss": 3.8169715881347654, "step": 6380 }, { "epoch": 0.05158092716514776, "grad_norm": 1.2463408708572388, "learning_rate": 1.8980800142161337e-05, "loss": 4.035663223266601, "step": 6390 }, { "epoch": 0.05166164849091481, "grad_norm": 0.7291328310966492, "learning_rate": 1.8979184672423125e-05, "loss": 3.966929244995117, "step": 6400 }, { "epoch": 0.05174236981668187, "grad_norm": 1.1941512823104858, "learning_rate": 1.8977569202684913e-05, "loss": 3.9156444549560545, "step": 6410 }, { "epoch": 0.05182309114244892, "grad_norm": 1.7476632595062256, "learning_rate": 1.89759537329467e-05, "loss": 3.8910873413085936, "step": 6420 }, { "epoch": 0.05190381246821598, "grad_norm": 1.0804413557052612, "learning_rate": 1.8974338263208485e-05, "loss": 3.5882625579833984, "step": 6430 }, { "epoch": 0.05198453379398303, "grad_norm": 1.1398413181304932, "learning_rate": 1.8972722793470272e-05, "loss": 3.5454471588134764, "step": 6440 }, { "epoch": 0.05206525511975009, "grad_norm": 1.4172462224960327, "learning_rate": 1.897110732373206e-05, "loss": 3.6201812744140627, "step": 6450 }, { "epoch": 0.05214597644551714, "grad_norm": 0.8094664216041565, "learning_rate": 1.8969491853993848e-05, "loss": 4.218225479125977, "step": 6460 }, { "epoch": 0.0522266977712842, "grad_norm": 0.9746145606040955, "learning_rate": 1.8967876384255632e-05, "loss": 3.7773548126220704, "step": 6470 }, { "epoch": 0.05230741909705125, "grad_norm": 1.2583811283111572, "learning_rate": 1.896626091451742e-05, "loss": 4.46471061706543, "step": 6480 }, { "epoch": 0.0523881404228183, "grad_norm": 1.2134106159210205, "learning_rate": 1.8964645444779207e-05, "loss": 3.9340892791748048, "step": 6490 }, { "epoch": 0.05246886174858536, "grad_norm": 1.5746454000473022, "learning_rate": 1.8963029975040995e-05, "loss": 3.8547019958496094, "step": 6500 }, { "epoch": 0.05254958307435241, "grad_norm": 1.2471667528152466, "learning_rate": 1.8961414505302783e-05, "loss": 4.34150390625, "step": 6510 }, { "epoch": 0.05263030440011947, "grad_norm": 3.2697765827178955, "learning_rate": 1.8959799035564567e-05, "loss": 4.227616500854492, "step": 6520 }, { "epoch": 0.05271102572588652, "grad_norm": 1.122043251991272, "learning_rate": 1.8958183565826355e-05, "loss": 3.366648483276367, "step": 6530 }, { "epoch": 0.05279174705165358, "grad_norm": 0.9623958468437195, "learning_rate": 1.8956568096088142e-05, "loss": 3.7100154876708986, "step": 6540 }, { "epoch": 0.05287246837742063, "grad_norm": 0.9893490076065063, "learning_rate": 1.895495262634993e-05, "loss": 3.9536590576171875, "step": 6550 }, { "epoch": 0.052953189703187686, "grad_norm": 1.0214284658432007, "learning_rate": 1.8953337156611714e-05, "loss": 4.219508361816406, "step": 6560 }, { "epoch": 0.05303391102895474, "grad_norm": 0.7449914813041687, "learning_rate": 1.8951721686873502e-05, "loss": 3.82471923828125, "step": 6570 }, { "epoch": 0.053114632354721796, "grad_norm": 0.8907321095466614, "learning_rate": 1.895010621713529e-05, "loss": 4.271127700805664, "step": 6580 }, { "epoch": 0.05319535368048885, "grad_norm": 1.7897543907165527, "learning_rate": 1.8948490747397078e-05, "loss": 4.153648376464844, "step": 6590 }, { "epoch": 0.053276075006255905, "grad_norm": 1.396287202835083, "learning_rate": 1.8946875277658862e-05, "loss": 3.938407135009766, "step": 6600 }, { "epoch": 0.053356796332022956, "grad_norm": 1.202823281288147, "learning_rate": 1.894525980792065e-05, "loss": 3.831094741821289, "step": 6610 }, { "epoch": 0.053437517657790014, "grad_norm": 1.1393721103668213, "learning_rate": 1.8943644338182437e-05, "loss": 3.5728225708007812, "step": 6620 }, { "epoch": 0.053518238983557065, "grad_norm": 1.0966013669967651, "learning_rate": 1.8942028868444225e-05, "loss": 3.9280925750732423, "step": 6630 }, { "epoch": 0.05359896030932412, "grad_norm": 0.6988476514816284, "learning_rate": 1.894041339870601e-05, "loss": 3.5471267700195312, "step": 6640 }, { "epoch": 0.053679681635091174, "grad_norm": 1.0963596105575562, "learning_rate": 1.8938797928967797e-05, "loss": 4.081988906860351, "step": 6650 }, { "epoch": 0.05376040296085823, "grad_norm": 1.066907525062561, "learning_rate": 1.8937182459229585e-05, "loss": 3.375558090209961, "step": 6660 }, { "epoch": 0.05384112428662528, "grad_norm": 0.7533989548683167, "learning_rate": 1.8935566989491372e-05, "loss": 3.6470081329345705, "step": 6670 }, { "epoch": 0.05392184561239234, "grad_norm": 1.0150055885314941, "learning_rate": 1.8933951519753157e-05, "loss": 3.7965770721435548, "step": 6680 }, { "epoch": 0.05400256693815939, "grad_norm": 0.7590096592903137, "learning_rate": 1.8932336050014944e-05, "loss": 4.0905403137207035, "step": 6690 }, { "epoch": 0.05408328826392644, "grad_norm": 0.9117512106895447, "learning_rate": 1.8930720580276732e-05, "loss": 4.035525131225586, "step": 6700 }, { "epoch": 0.0541640095896935, "grad_norm": 0.9224339723587036, "learning_rate": 1.892910511053852e-05, "loss": 3.995868682861328, "step": 6710 }, { "epoch": 0.05424473091546055, "grad_norm": 2.2119193077087402, "learning_rate": 1.8927489640800304e-05, "loss": 4.867451477050781, "step": 6720 }, { "epoch": 0.05432545224122761, "grad_norm": 1.3798842430114746, "learning_rate": 1.892587417106209e-05, "loss": 3.9140769958496096, "step": 6730 }, { "epoch": 0.05440617356699466, "grad_norm": 1.0151206254959106, "learning_rate": 1.892425870132388e-05, "loss": 3.767953109741211, "step": 6740 }, { "epoch": 0.05448689489276172, "grad_norm": 1.1671338081359863, "learning_rate": 1.8922643231585667e-05, "loss": 3.9115047454833984, "step": 6750 }, { "epoch": 0.05456761621852877, "grad_norm": 1.0968564748764038, "learning_rate": 1.892102776184745e-05, "loss": 3.49474983215332, "step": 6760 }, { "epoch": 0.05464833754429583, "grad_norm": 0.9297124743461609, "learning_rate": 1.891941229210924e-05, "loss": 4.565765762329102, "step": 6770 }, { "epoch": 0.05472905887006288, "grad_norm": 0.7998102903366089, "learning_rate": 1.8917796822371027e-05, "loss": 3.9090930938720705, "step": 6780 }, { "epoch": 0.05480978019582994, "grad_norm": 0.9464839696884155, "learning_rate": 1.8916181352632814e-05, "loss": 3.958469009399414, "step": 6790 }, { "epoch": 0.05489050152159699, "grad_norm": 0.9717810750007629, "learning_rate": 1.89145658828946e-05, "loss": 4.293634414672852, "step": 6800 }, { "epoch": 0.05497122284736405, "grad_norm": 0.772857129573822, "learning_rate": 1.8912950413156386e-05, "loss": 4.1173149108886715, "step": 6810 }, { "epoch": 0.0550519441731311, "grad_norm": 1.0132750272750854, "learning_rate": 1.8911334943418174e-05, "loss": 3.7257511138916017, "step": 6820 }, { "epoch": 0.055132665498898156, "grad_norm": 2.000317335128784, "learning_rate": 1.8909719473679962e-05, "loss": 3.8671951293945312, "step": 6830 }, { "epoch": 0.05521338682466521, "grad_norm": 1.4961435794830322, "learning_rate": 1.8908104003941746e-05, "loss": 4.2016754150390625, "step": 6840 }, { "epoch": 0.055294108150432265, "grad_norm": 1.192291498184204, "learning_rate": 1.8906488534203534e-05, "loss": 4.02922477722168, "step": 6850 }, { "epoch": 0.055374829476199317, "grad_norm": 1.4452502727508545, "learning_rate": 1.890487306446532e-05, "loss": 3.5631771087646484, "step": 6860 }, { "epoch": 0.055455550801966375, "grad_norm": 1.4627517461776733, "learning_rate": 1.890325759472711e-05, "loss": 3.7002723693847654, "step": 6870 }, { "epoch": 0.055536272127733426, "grad_norm": 0.7407609820365906, "learning_rate": 1.8901642124988894e-05, "loss": 4.409762191772461, "step": 6880 }, { "epoch": 0.055616993453500484, "grad_norm": 0.9886924028396606, "learning_rate": 1.890002665525068e-05, "loss": 3.8481220245361327, "step": 6890 }, { "epoch": 0.055697714779267535, "grad_norm": 0.8832054734230042, "learning_rate": 1.889841118551247e-05, "loss": 4.226226806640625, "step": 6900 }, { "epoch": 0.055778436105034586, "grad_norm": 1.0620492696762085, "learning_rate": 1.8896795715774257e-05, "loss": 3.7094005584716796, "step": 6910 }, { "epoch": 0.055859157430801644, "grad_norm": 1.1153236627578735, "learning_rate": 1.889518024603604e-05, "loss": 3.61572151184082, "step": 6920 }, { "epoch": 0.055939878756568695, "grad_norm": 1.1316016912460327, "learning_rate": 1.889356477629783e-05, "loss": 3.928028869628906, "step": 6930 }, { "epoch": 0.05602060008233575, "grad_norm": 0.755100667476654, "learning_rate": 1.8891949306559616e-05, "loss": 3.3382633209228514, "step": 6940 }, { "epoch": 0.056101321408102804, "grad_norm": 0.9292479157447815, "learning_rate": 1.8890333836821404e-05, "loss": 3.6061100006103515, "step": 6950 }, { "epoch": 0.05618204273386986, "grad_norm": 1.1738840341567993, "learning_rate": 1.8888718367083188e-05, "loss": 4.2319183349609375, "step": 6960 }, { "epoch": 0.05626276405963691, "grad_norm": 1.2125003337860107, "learning_rate": 1.888710289734498e-05, "loss": 4.4724987030029295, "step": 6970 }, { "epoch": 0.05634348538540397, "grad_norm": 7.508830547332764, "learning_rate": 1.8885487427606764e-05, "loss": 4.727987670898438, "step": 6980 }, { "epoch": 0.05642420671117102, "grad_norm": 1.1748058795928955, "learning_rate": 1.888387195786855e-05, "loss": 4.281808471679687, "step": 6990 }, { "epoch": 0.05650492803693808, "grad_norm": 3.3767178058624268, "learning_rate": 1.8882256488130336e-05, "loss": 4.062430953979492, "step": 7000 }, { "epoch": 0.05658564936270513, "grad_norm": 1.9918705224990845, "learning_rate": 1.8880641018392127e-05, "loss": 3.6458335876464845, "step": 7010 }, { "epoch": 0.05666637068847219, "grad_norm": 0.9864311814308167, "learning_rate": 1.887902554865391e-05, "loss": 4.041366195678711, "step": 7020 }, { "epoch": 0.05674709201423924, "grad_norm": 1.0799754858016968, "learning_rate": 1.88774100789157e-05, "loss": 4.390543365478516, "step": 7030 }, { "epoch": 0.0568278133400063, "grad_norm": 1.6307411193847656, "learning_rate": 1.8875794609177483e-05, "loss": 3.947567367553711, "step": 7040 }, { "epoch": 0.05690853466577335, "grad_norm": 0.6861035227775574, "learning_rate": 1.8874179139439274e-05, "loss": 4.619384002685547, "step": 7050 }, { "epoch": 0.05698925599154041, "grad_norm": 0.9284070134162903, "learning_rate": 1.887256366970106e-05, "loss": 3.8825363159179687, "step": 7060 }, { "epoch": 0.05706997731730746, "grad_norm": 1.0319855213165283, "learning_rate": 1.8870948199962846e-05, "loss": 3.7489830017089845, "step": 7070 }, { "epoch": 0.05715069864307452, "grad_norm": 1.7174781560897827, "learning_rate": 1.886933273022463e-05, "loss": 4.056433868408203, "step": 7080 }, { "epoch": 0.05723141996884157, "grad_norm": 1.0964654684066772, "learning_rate": 1.886771726048642e-05, "loss": 4.1409355163574215, "step": 7090 }, { "epoch": 0.05731214129460862, "grad_norm": 0.9838125705718994, "learning_rate": 1.8866101790748206e-05, "loss": 3.3314353942871096, "step": 7100 }, { "epoch": 0.05739286262037568, "grad_norm": 0.9878808259963989, "learning_rate": 1.8864486321009994e-05, "loss": 3.7864154815673827, "step": 7110 }, { "epoch": 0.05747358394614273, "grad_norm": 1.2036609649658203, "learning_rate": 1.886287085127178e-05, "loss": 3.522562026977539, "step": 7120 }, { "epoch": 0.057554305271909786, "grad_norm": 0.9661350250244141, "learning_rate": 1.886125538153357e-05, "loss": 3.9641780853271484, "step": 7130 }, { "epoch": 0.05763502659767684, "grad_norm": 1.1021405458450317, "learning_rate": 1.8859639911795353e-05, "loss": 3.9516983032226562, "step": 7140 }, { "epoch": 0.057715747923443896, "grad_norm": 1.5727473497390747, "learning_rate": 1.885802444205714e-05, "loss": 4.184222030639648, "step": 7150 }, { "epoch": 0.05779646924921095, "grad_norm": 0.9744871854782104, "learning_rate": 1.885640897231893e-05, "loss": 3.5319393157958983, "step": 7160 }, { "epoch": 0.057877190574978005, "grad_norm": 1.2051987648010254, "learning_rate": 1.8854793502580716e-05, "loss": 3.7270668029785154, "step": 7170 }, { "epoch": 0.057957911900745056, "grad_norm": 1.272252082824707, "learning_rate": 1.88531780328425e-05, "loss": 3.800779342651367, "step": 7180 }, { "epoch": 0.058038633226512114, "grad_norm": 0.8935471177101135, "learning_rate": 1.8851562563104288e-05, "loss": 3.5646167755126954, "step": 7190 }, { "epoch": 0.058119354552279165, "grad_norm": 0.7880212068557739, "learning_rate": 1.8849947093366076e-05, "loss": 3.6482410430908203, "step": 7200 }, { "epoch": 0.05820007587804622, "grad_norm": 0.978563666343689, "learning_rate": 1.8848331623627864e-05, "loss": 3.608643341064453, "step": 7210 }, { "epoch": 0.058280797203813274, "grad_norm": 1.6847397089004517, "learning_rate": 1.8846716153889648e-05, "loss": 3.631250762939453, "step": 7220 }, { "epoch": 0.05836151852958033, "grad_norm": 2.3456008434295654, "learning_rate": 1.8845100684151436e-05, "loss": 4.980622863769531, "step": 7230 }, { "epoch": 0.05844223985534738, "grad_norm": 0.7246406078338623, "learning_rate": 1.8843485214413223e-05, "loss": 4.225455856323242, "step": 7240 }, { "epoch": 0.05852296118111444, "grad_norm": 2.245452642440796, "learning_rate": 1.884186974467501e-05, "loss": 3.366838073730469, "step": 7250 }, { "epoch": 0.05860368250688149, "grad_norm": 1.5550767183303833, "learning_rate": 1.8840254274936795e-05, "loss": 4.0767169952392575, "step": 7260 }, { "epoch": 0.05868440383264855, "grad_norm": 0.8920758366584778, "learning_rate": 1.8838638805198583e-05, "loss": 4.184835052490234, "step": 7270 }, { "epoch": 0.0587651251584156, "grad_norm": 1.0338104963302612, "learning_rate": 1.883702333546037e-05, "loss": 3.9564445495605467, "step": 7280 }, { "epoch": 0.05884584648418266, "grad_norm": 0.9381645321846008, "learning_rate": 1.883540786572216e-05, "loss": 3.7753952026367186, "step": 7290 }, { "epoch": 0.05892656780994971, "grad_norm": 0.6962329745292664, "learning_rate": 1.8833792395983943e-05, "loss": 4.029602813720703, "step": 7300 }, { "epoch": 0.05900728913571676, "grad_norm": 0.7968412041664124, "learning_rate": 1.883217692624573e-05, "loss": 3.913155746459961, "step": 7310 }, { "epoch": 0.05908801046148382, "grad_norm": 0.8920120596885681, "learning_rate": 1.8830561456507518e-05, "loss": 3.718666839599609, "step": 7320 }, { "epoch": 0.05916873178725087, "grad_norm": 1.0305486917495728, "learning_rate": 1.8828945986769306e-05, "loss": 4.4831291198730465, "step": 7330 }, { "epoch": 0.05924945311301793, "grad_norm": 1.5760372877120972, "learning_rate": 1.882733051703109e-05, "loss": 4.116658020019531, "step": 7340 }, { "epoch": 0.05933017443878498, "grad_norm": 1.0225812196731567, "learning_rate": 1.8825715047292878e-05, "loss": 4.276322937011718, "step": 7350 }, { "epoch": 0.05941089576455204, "grad_norm": 0.8552616834640503, "learning_rate": 1.8824099577554666e-05, "loss": 3.850067138671875, "step": 7360 }, { "epoch": 0.05949161709031909, "grad_norm": 0.9506632685661316, "learning_rate": 1.8822484107816453e-05, "loss": 3.6924392700195314, "step": 7370 }, { "epoch": 0.05957233841608615, "grad_norm": 1.0546354055404663, "learning_rate": 1.8820868638078238e-05, "loss": 3.7694473266601562, "step": 7380 }, { "epoch": 0.0596530597418532, "grad_norm": 1.2492289543151855, "learning_rate": 1.8819253168340025e-05, "loss": 3.7833683013916017, "step": 7390 }, { "epoch": 0.059733781067620256, "grad_norm": 1.212972640991211, "learning_rate": 1.8817637698601813e-05, "loss": 3.5537303924560546, "step": 7400 }, { "epoch": 0.05981450239338731, "grad_norm": 0.8844053745269775, "learning_rate": 1.88160222288636e-05, "loss": 3.7142253875732423, "step": 7410 }, { "epoch": 0.059895223719154365, "grad_norm": 1.0233135223388672, "learning_rate": 1.8814406759125385e-05, "loss": 3.9622650146484375, "step": 7420 }, { "epoch": 0.05997594504492142, "grad_norm": 1.3177647590637207, "learning_rate": 1.8812791289387173e-05, "loss": 3.631791687011719, "step": 7430 }, { "epoch": 0.060056666370688475, "grad_norm": 0.9148555994033813, "learning_rate": 1.881117581964896e-05, "loss": 3.821018600463867, "step": 7440 }, { "epoch": 0.060137387696455526, "grad_norm": 1.319475769996643, "learning_rate": 1.8809560349910748e-05, "loss": 3.7231658935546874, "step": 7450 }, { "epoch": 0.060218109022222584, "grad_norm": 1.0065284967422485, "learning_rate": 1.8807944880172532e-05, "loss": 3.6976829528808595, "step": 7460 }, { "epoch": 0.060298830347989635, "grad_norm": 1.101468801498413, "learning_rate": 1.880632941043432e-05, "loss": 3.8951770782470705, "step": 7470 }, { "epoch": 0.06037955167375669, "grad_norm": 0.9023465514183044, "learning_rate": 1.8804713940696108e-05, "loss": 3.6798782348632812, "step": 7480 }, { "epoch": 0.060460272999523744, "grad_norm": 1.1093106269836426, "learning_rate": 1.8803098470957895e-05, "loss": 4.090404510498047, "step": 7490 }, { "epoch": 0.0605409943252908, "grad_norm": 0.9112844467163086, "learning_rate": 1.880148300121968e-05, "loss": 3.945155715942383, "step": 7500 }, { "epoch": 0.06062171565105785, "grad_norm": 0.7097480893135071, "learning_rate": 1.8799867531481467e-05, "loss": 3.543164825439453, "step": 7510 }, { "epoch": 0.060702436976824904, "grad_norm": 1.3261356353759766, "learning_rate": 1.8798252061743255e-05, "loss": 3.429964065551758, "step": 7520 }, { "epoch": 0.06078315830259196, "grad_norm": 0.7471715807914734, "learning_rate": 1.8796636592005043e-05, "loss": 3.5903697967529298, "step": 7530 }, { "epoch": 0.06086387962835901, "grad_norm": 0.5903865098953247, "learning_rate": 1.8795021122266827e-05, "loss": 3.6616146087646486, "step": 7540 }, { "epoch": 0.06094460095412607, "grad_norm": 1.3282064199447632, "learning_rate": 1.8793405652528615e-05, "loss": 3.7516571044921876, "step": 7550 }, { "epoch": 0.06102532227989312, "grad_norm": 0.9029403328895569, "learning_rate": 1.8791790182790402e-05, "loss": 3.8838115692138673, "step": 7560 }, { "epoch": 0.06110604360566018, "grad_norm": 1.1758476495742798, "learning_rate": 1.879017471305219e-05, "loss": 4.025917434692383, "step": 7570 }, { "epoch": 0.06118676493142723, "grad_norm": 1.4019696712493896, "learning_rate": 1.8788559243313974e-05, "loss": 3.935799789428711, "step": 7580 }, { "epoch": 0.06126748625719429, "grad_norm": 1.9167431592941284, "learning_rate": 1.8786943773575762e-05, "loss": 3.747219467163086, "step": 7590 }, { "epoch": 0.06134820758296134, "grad_norm": 0.8766708374023438, "learning_rate": 1.878532830383755e-05, "loss": 3.574024200439453, "step": 7600 }, { "epoch": 0.0614289289087284, "grad_norm": 2.103281021118164, "learning_rate": 1.8783712834099337e-05, "loss": 3.5294761657714844, "step": 7610 }, { "epoch": 0.06150965023449545, "grad_norm": 0.9076696038246155, "learning_rate": 1.8782097364361122e-05, "loss": 3.954905700683594, "step": 7620 }, { "epoch": 0.06159037156026251, "grad_norm": 0.9910807013511658, "learning_rate": 1.878048189462291e-05, "loss": 3.9470115661621095, "step": 7630 }, { "epoch": 0.06167109288602956, "grad_norm": 1.0947929620742798, "learning_rate": 1.8778866424884697e-05, "loss": 4.0026599884033205, "step": 7640 }, { "epoch": 0.06175181421179662, "grad_norm": 1.4458699226379395, "learning_rate": 1.8777250955146485e-05, "loss": 3.7655872344970702, "step": 7650 }, { "epoch": 0.06183253553756367, "grad_norm": 0.9240409135818481, "learning_rate": 1.877563548540827e-05, "loss": 3.6917037963867188, "step": 7660 }, { "epoch": 0.061913256863330726, "grad_norm": 0.6859888434410095, "learning_rate": 1.8774020015670057e-05, "loss": 3.975014877319336, "step": 7670 }, { "epoch": 0.06199397818909778, "grad_norm": 0.5802547335624695, "learning_rate": 1.8772404545931845e-05, "loss": 3.6943603515625, "step": 7680 }, { "epoch": 0.062074699514864835, "grad_norm": 1.57650887966156, "learning_rate": 1.8770789076193632e-05, "loss": 3.9964332580566406, "step": 7690 }, { "epoch": 0.062155420840631886, "grad_norm": 1.319881796836853, "learning_rate": 1.8769173606455417e-05, "loss": 3.791651153564453, "step": 7700 }, { "epoch": 0.06223614216639894, "grad_norm": 1.0997490882873535, "learning_rate": 1.8767558136717204e-05, "loss": 3.9730228424072265, "step": 7710 }, { "epoch": 0.062316863492165996, "grad_norm": 1.3002580404281616, "learning_rate": 1.8765942666978992e-05, "loss": 3.877471160888672, "step": 7720 }, { "epoch": 0.06239758481793305, "grad_norm": 0.7795037627220154, "learning_rate": 1.876432719724078e-05, "loss": 3.695795440673828, "step": 7730 }, { "epoch": 0.062478306143700105, "grad_norm": 1.221368670463562, "learning_rate": 1.8762711727502567e-05, "loss": 3.974779510498047, "step": 7740 }, { "epoch": 0.06255902746946716, "grad_norm": 0.7731301784515381, "learning_rate": 1.876109625776435e-05, "loss": 3.7455345153808595, "step": 7750 }, { "epoch": 0.0626397487952342, "grad_norm": 0.7296656966209412, "learning_rate": 1.875948078802614e-05, "loss": 4.009473037719727, "step": 7760 }, { "epoch": 0.06272047012100126, "grad_norm": 1.2612383365631104, "learning_rate": 1.8757865318287927e-05, "loss": 3.6259796142578127, "step": 7770 }, { "epoch": 0.06280119144676832, "grad_norm": 1.6196932792663574, "learning_rate": 1.8756249848549715e-05, "loss": 4.348687362670899, "step": 7780 }, { "epoch": 0.06288191277253538, "grad_norm": 2.4883854389190674, "learning_rate": 1.87546343788115e-05, "loss": 3.585981750488281, "step": 7790 }, { "epoch": 0.06296263409830243, "grad_norm": 1.1459221839904785, "learning_rate": 1.8753018909073287e-05, "loss": 3.9150146484375, "step": 7800 }, { "epoch": 0.06304335542406948, "grad_norm": 1.2108365297317505, "learning_rate": 1.8751403439335074e-05, "loss": 4.056612396240235, "step": 7810 }, { "epoch": 0.06312407674983654, "grad_norm": 1.224731683731079, "learning_rate": 1.8749787969596862e-05, "loss": 3.616780090332031, "step": 7820 }, { "epoch": 0.0632047980756036, "grad_norm": 1.010125994682312, "learning_rate": 1.8748172499858646e-05, "loss": 4.0782428741455075, "step": 7830 }, { "epoch": 0.06328551940137064, "grad_norm": 0.7893972396850586, "learning_rate": 1.8746557030120437e-05, "loss": 3.762334442138672, "step": 7840 }, { "epoch": 0.0633662407271377, "grad_norm": 0.9737257361412048, "learning_rate": 1.8744941560382222e-05, "loss": 3.8805675506591797, "step": 7850 }, { "epoch": 0.06344696205290476, "grad_norm": 2.107862949371338, "learning_rate": 1.874332609064401e-05, "loss": 4.282862854003906, "step": 7860 }, { "epoch": 0.06352768337867182, "grad_norm": 1.201302409172058, "learning_rate": 1.8741710620905794e-05, "loss": 3.6042705535888673, "step": 7870 }, { "epoch": 0.06360840470443886, "grad_norm": 0.8261221051216125, "learning_rate": 1.8740095151167585e-05, "loss": 3.7276931762695313, "step": 7880 }, { "epoch": 0.06368912603020592, "grad_norm": 1.2431037425994873, "learning_rate": 1.873847968142937e-05, "loss": 3.8472930908203127, "step": 7890 }, { "epoch": 0.06376984735597298, "grad_norm": 0.9229593873023987, "learning_rate": 1.8736864211691157e-05, "loss": 3.981663131713867, "step": 7900 }, { "epoch": 0.06385056868174002, "grad_norm": 1.5386403799057007, "learning_rate": 1.873524874195294e-05, "loss": 4.352790451049804, "step": 7910 }, { "epoch": 0.06393129000750708, "grad_norm": 0.9543377757072449, "learning_rate": 1.8733633272214732e-05, "loss": 3.382924270629883, "step": 7920 }, { "epoch": 0.06401201133327414, "grad_norm": 1.8621643781661987, "learning_rate": 1.8732017802476517e-05, "loss": 4.362221145629883, "step": 7930 }, { "epoch": 0.0640927326590412, "grad_norm": 1.5047447681427002, "learning_rate": 1.8730402332738304e-05, "loss": 3.8437557220458984, "step": 7940 }, { "epoch": 0.06417345398480824, "grad_norm": 0.9802398085594177, "learning_rate": 1.872878686300009e-05, "loss": 3.758544158935547, "step": 7950 }, { "epoch": 0.0642541753105753, "grad_norm": 1.026803731918335, "learning_rate": 1.872717139326188e-05, "loss": 3.20477180480957, "step": 7960 }, { "epoch": 0.06433489663634236, "grad_norm": 1.0082319974899292, "learning_rate": 1.8725555923523664e-05, "loss": 4.307893753051758, "step": 7970 }, { "epoch": 0.06441561796210941, "grad_norm": 1.0413990020751953, "learning_rate": 1.872394045378545e-05, "loss": 3.586578369140625, "step": 7980 }, { "epoch": 0.06449633928787646, "grad_norm": 1.2867546081542969, "learning_rate": 1.872232498404724e-05, "loss": 4.338106155395508, "step": 7990 }, { "epoch": 0.06457706061364352, "grad_norm": 1.979444146156311, "learning_rate": 1.8720709514309027e-05, "loss": 3.564011001586914, "step": 8000 }, { "epoch": 0.06465778193941057, "grad_norm": 1.0140947103500366, "learning_rate": 1.871909404457081e-05, "loss": 3.6363956451416017, "step": 8010 }, { "epoch": 0.06473850326517763, "grad_norm": 0.9731422066688538, "learning_rate": 1.87174785748326e-05, "loss": 3.469438171386719, "step": 8020 }, { "epoch": 0.06481922459094468, "grad_norm": 1.4616483449935913, "learning_rate": 1.8715863105094387e-05, "loss": 4.212585830688477, "step": 8030 }, { "epoch": 0.06489994591671173, "grad_norm": 0.7450642585754395, "learning_rate": 1.8714247635356174e-05, "loss": 3.502842330932617, "step": 8040 }, { "epoch": 0.06498066724247879, "grad_norm": 2.201688528060913, "learning_rate": 1.871263216561796e-05, "loss": 3.795195388793945, "step": 8050 }, { "epoch": 0.06506138856824585, "grad_norm": 1.1111266613006592, "learning_rate": 1.8711016695879746e-05, "loss": 3.6617908477783203, "step": 8060 }, { "epoch": 0.0651421098940129, "grad_norm": 0.9635035991668701, "learning_rate": 1.8709401226141534e-05, "loss": 3.7052375793457033, "step": 8070 }, { "epoch": 0.06522283121977995, "grad_norm": 1.6220320463180542, "learning_rate": 1.8707785756403322e-05, "loss": 4.078377151489258, "step": 8080 }, { "epoch": 0.06530355254554701, "grad_norm": 1.0900541543960571, "learning_rate": 1.8706170286665106e-05, "loss": 3.418636703491211, "step": 8090 }, { "epoch": 0.06538427387131407, "grad_norm": 1.5786035060882568, "learning_rate": 1.8704554816926894e-05, "loss": 3.666904067993164, "step": 8100 }, { "epoch": 0.06546499519708111, "grad_norm": 1.0549734830856323, "learning_rate": 1.870293934718868e-05, "loss": 4.0225830078125, "step": 8110 }, { "epoch": 0.06554571652284817, "grad_norm": 1.2465068101882935, "learning_rate": 1.870132387745047e-05, "loss": 4.435401916503906, "step": 8120 }, { "epoch": 0.06562643784861523, "grad_norm": 2.5792460441589355, "learning_rate": 1.8699708407712253e-05, "loss": 3.4972537994384765, "step": 8130 }, { "epoch": 0.06570715917438227, "grad_norm": 1.1794929504394531, "learning_rate": 1.869809293797404e-05, "loss": 3.495095062255859, "step": 8140 }, { "epoch": 0.06578788050014933, "grad_norm": 1.2042073011398315, "learning_rate": 1.869647746823583e-05, "loss": 3.4075794219970703, "step": 8150 }, { "epoch": 0.06586860182591639, "grad_norm": 0.9192395210266113, "learning_rate": 1.8694861998497617e-05, "loss": 3.654281997680664, "step": 8160 }, { "epoch": 0.06594932315168345, "grad_norm": 1.2702910900115967, "learning_rate": 1.86932465287594e-05, "loss": 3.8203086853027344, "step": 8170 }, { "epoch": 0.06603004447745049, "grad_norm": 0.9540806412696838, "learning_rate": 1.869163105902119e-05, "loss": 3.993061828613281, "step": 8180 }, { "epoch": 0.06611076580321755, "grad_norm": 0.8911136388778687, "learning_rate": 1.8690015589282976e-05, "loss": 3.3312057495117187, "step": 8190 }, { "epoch": 0.06619148712898461, "grad_norm": 1.3162822723388672, "learning_rate": 1.8688400119544764e-05, "loss": 3.6358394622802734, "step": 8200 }, { "epoch": 0.06627220845475167, "grad_norm": 0.908817708492279, "learning_rate": 1.8686784649806548e-05, "loss": 3.8226341247558593, "step": 8210 }, { "epoch": 0.06635292978051871, "grad_norm": 1.0894070863723755, "learning_rate": 1.8685169180068336e-05, "loss": 3.2291633605957033, "step": 8220 }, { "epoch": 0.06643365110628577, "grad_norm": 1.3755110502243042, "learning_rate": 1.8683553710330124e-05, "loss": 3.6645790100097657, "step": 8230 }, { "epoch": 0.06651437243205283, "grad_norm": 0.7968117594718933, "learning_rate": 1.868193824059191e-05, "loss": 3.5392292022705076, "step": 8240 }, { "epoch": 0.06659509375781988, "grad_norm": 0.8809890747070312, "learning_rate": 1.8680322770853696e-05, "loss": 3.287556838989258, "step": 8250 }, { "epoch": 0.06667581508358693, "grad_norm": 1.1581189632415771, "learning_rate": 1.8678707301115483e-05, "loss": 3.8519859313964844, "step": 8260 }, { "epoch": 0.06675653640935399, "grad_norm": 1.0146205425262451, "learning_rate": 1.867709183137727e-05, "loss": 3.7350078582763673, "step": 8270 }, { "epoch": 0.06683725773512104, "grad_norm": 1.1235783100128174, "learning_rate": 1.867547636163906e-05, "loss": 4.128203582763672, "step": 8280 }, { "epoch": 0.0669179790608881, "grad_norm": 0.8283173441886902, "learning_rate": 1.8673860891900843e-05, "loss": 3.9002311706542967, "step": 8290 }, { "epoch": 0.06699870038665515, "grad_norm": 0.9379335045814514, "learning_rate": 1.867224542216263e-05, "loss": 3.772323226928711, "step": 8300 }, { "epoch": 0.0670794217124222, "grad_norm": 1.140924334526062, "learning_rate": 1.867062995242442e-05, "loss": 3.8177539825439455, "step": 8310 }, { "epoch": 0.06716014303818926, "grad_norm": 0.9456784129142761, "learning_rate": 1.8669014482686206e-05, "loss": 4.4038043975830075, "step": 8320 }, { "epoch": 0.06724086436395631, "grad_norm": 1.2587040662765503, "learning_rate": 1.866739901294799e-05, "loss": 3.543212127685547, "step": 8330 }, { "epoch": 0.06732158568972336, "grad_norm": 1.0309410095214844, "learning_rate": 1.8665783543209778e-05, "loss": 3.8904701232910157, "step": 8340 }, { "epoch": 0.06740230701549042, "grad_norm": 1.0400969982147217, "learning_rate": 1.8664168073471566e-05, "loss": 3.568597412109375, "step": 8350 }, { "epoch": 0.06748302834125748, "grad_norm": 0.835806667804718, "learning_rate": 1.8662552603733353e-05, "loss": 4.11774787902832, "step": 8360 }, { "epoch": 0.06756374966702453, "grad_norm": 0.6581095457077026, "learning_rate": 1.8660937133995138e-05, "loss": 3.470803451538086, "step": 8370 }, { "epoch": 0.06764447099279158, "grad_norm": 1.4669532775878906, "learning_rate": 1.8659321664256925e-05, "loss": 3.584768295288086, "step": 8380 }, { "epoch": 0.06772519231855864, "grad_norm": 1.31013023853302, "learning_rate": 1.8657706194518713e-05, "loss": 3.5070335388183596, "step": 8390 }, { "epoch": 0.0678059136443257, "grad_norm": 0.6371193528175354, "learning_rate": 1.86560907247805e-05, "loss": 3.4014877319335937, "step": 8400 }, { "epoch": 0.06788663497009274, "grad_norm": 1.4741768836975098, "learning_rate": 1.8654475255042285e-05, "loss": 3.862040328979492, "step": 8410 }, { "epoch": 0.0679673562958598, "grad_norm": 1.362408995628357, "learning_rate": 1.8652859785304073e-05, "loss": 4.048194885253906, "step": 8420 }, { "epoch": 0.06804807762162686, "grad_norm": 1.1413980722427368, "learning_rate": 1.865124431556586e-05, "loss": 4.089280700683593, "step": 8430 }, { "epoch": 0.06812879894739392, "grad_norm": 0.8617345690727234, "learning_rate": 1.8649628845827648e-05, "loss": 3.4519222259521483, "step": 8440 }, { "epoch": 0.06820952027316096, "grad_norm": 0.7301732301712036, "learning_rate": 1.8648013376089433e-05, "loss": 3.520016098022461, "step": 8450 }, { "epoch": 0.06829024159892802, "grad_norm": 1.9750739336013794, "learning_rate": 1.864639790635122e-05, "loss": 3.3560195922851563, "step": 8460 }, { "epoch": 0.06837096292469508, "grad_norm": 0.8116954565048218, "learning_rate": 1.8644782436613008e-05, "loss": 3.6290637969970705, "step": 8470 }, { "epoch": 0.06845168425046214, "grad_norm": 1.2730004787445068, "learning_rate": 1.8643166966874796e-05, "loss": 4.292410278320313, "step": 8480 }, { "epoch": 0.06853240557622918, "grad_norm": 0.6274962425231934, "learning_rate": 1.864155149713658e-05, "loss": 3.3339111328125, "step": 8490 }, { "epoch": 0.06861312690199624, "grad_norm": 0.961417555809021, "learning_rate": 1.8639936027398368e-05, "loss": 3.5982433319091798, "step": 8500 }, { "epoch": 0.0686938482277633, "grad_norm": 0.7362542748451233, "learning_rate": 1.8638320557660155e-05, "loss": 3.7813579559326174, "step": 8510 }, { "epoch": 0.06877456955353034, "grad_norm": 0.7109969258308411, "learning_rate": 1.8636705087921943e-05, "loss": 3.5379764556884767, "step": 8520 }, { "epoch": 0.0688552908792974, "grad_norm": 1.0523686408996582, "learning_rate": 1.8635089618183727e-05, "loss": 3.447440338134766, "step": 8530 }, { "epoch": 0.06893601220506446, "grad_norm": 1.286592960357666, "learning_rate": 1.8633474148445515e-05, "loss": 3.4917324066162108, "step": 8540 }, { "epoch": 0.06901673353083151, "grad_norm": 1.02684485912323, "learning_rate": 1.8631858678707303e-05, "loss": 3.3843757629394533, "step": 8550 }, { "epoch": 0.06909745485659856, "grad_norm": 0.848208487033844, "learning_rate": 1.863024320896909e-05, "loss": 4.455778121948242, "step": 8560 }, { "epoch": 0.06917817618236562, "grad_norm": 1.4410505294799805, "learning_rate": 1.8628627739230875e-05, "loss": 3.9962528228759764, "step": 8570 }, { "epoch": 0.06925889750813267, "grad_norm": 0.9638378620147705, "learning_rate": 1.8627012269492662e-05, "loss": 3.903091049194336, "step": 8580 }, { "epoch": 0.06933961883389973, "grad_norm": 0.6527012586593628, "learning_rate": 1.862539679975445e-05, "loss": 3.6469276428222654, "step": 8590 }, { "epoch": 0.06942034015966678, "grad_norm": 1.1326966285705566, "learning_rate": 1.8623781330016238e-05, "loss": 3.811357879638672, "step": 8600 }, { "epoch": 0.06950106148543383, "grad_norm": 0.9001250267028809, "learning_rate": 1.8622165860278022e-05, "loss": 3.8706180572509767, "step": 8610 }, { "epoch": 0.06958178281120089, "grad_norm": 1.3083235025405884, "learning_rate": 1.862055039053981e-05, "loss": 3.614872360229492, "step": 8620 }, { "epoch": 0.06966250413696795, "grad_norm": 0.9647362232208252, "learning_rate": 1.8618934920801597e-05, "loss": 3.5257308959960936, "step": 8630 }, { "epoch": 0.069743225462735, "grad_norm": 1.2527267932891846, "learning_rate": 1.8617319451063385e-05, "loss": 3.3719623565673826, "step": 8640 }, { "epoch": 0.06982394678850205, "grad_norm": 1.277700424194336, "learning_rate": 1.861570398132517e-05, "loss": 3.8191974639892576, "step": 8650 }, { "epoch": 0.06990466811426911, "grad_norm": 1.1281287670135498, "learning_rate": 1.8614088511586957e-05, "loss": 3.8327823638916017, "step": 8660 }, { "epoch": 0.06998538944003617, "grad_norm": 1.3166252374649048, "learning_rate": 1.8612473041848745e-05, "loss": 4.1453197479248045, "step": 8670 }, { "epoch": 0.07006611076580321, "grad_norm": 0.8570667505264282, "learning_rate": 1.8610857572110533e-05, "loss": 3.6750831604003906, "step": 8680 }, { "epoch": 0.07014683209157027, "grad_norm": 1.1909078359603882, "learning_rate": 1.8609242102372317e-05, "loss": 4.109222030639648, "step": 8690 }, { "epoch": 0.07022755341733733, "grad_norm": 1.3058481216430664, "learning_rate": 1.8607626632634105e-05, "loss": 4.265829849243164, "step": 8700 }, { "epoch": 0.07030827474310439, "grad_norm": 1.2703789472579956, "learning_rate": 1.8606011162895892e-05, "loss": 3.574449157714844, "step": 8710 }, { "epoch": 0.07038899606887143, "grad_norm": 1.5192734003067017, "learning_rate": 1.860439569315768e-05, "loss": 3.8475772857666017, "step": 8720 }, { "epoch": 0.07046971739463849, "grad_norm": 0.9653416275978088, "learning_rate": 1.8602780223419464e-05, "loss": 3.395328903198242, "step": 8730 }, { "epoch": 0.07055043872040555, "grad_norm": 1.9962897300720215, "learning_rate": 1.8601164753681252e-05, "loss": 3.7365394592285157, "step": 8740 }, { "epoch": 0.07063116004617259, "grad_norm": 0.6599236130714417, "learning_rate": 1.859954928394304e-05, "loss": 3.9050971984863283, "step": 8750 }, { "epoch": 0.07071188137193965, "grad_norm": 0.9578141570091248, "learning_rate": 1.8597933814204827e-05, "loss": 3.5134521484375, "step": 8760 }, { "epoch": 0.07079260269770671, "grad_norm": 1.0603200197219849, "learning_rate": 1.859631834446661e-05, "loss": 3.863721466064453, "step": 8770 }, { "epoch": 0.07087332402347377, "grad_norm": 0.7751734256744385, "learning_rate": 1.85947028747284e-05, "loss": 3.4574657440185548, "step": 8780 }, { "epoch": 0.07095404534924081, "grad_norm": 0.953290581703186, "learning_rate": 1.8593087404990187e-05, "loss": 3.9310791015625, "step": 8790 }, { "epoch": 0.07103476667500787, "grad_norm": 1.0111368894577026, "learning_rate": 1.8591471935251975e-05, "loss": 3.6620914459228517, "step": 8800 }, { "epoch": 0.07111548800077493, "grad_norm": 1.203175663948059, "learning_rate": 1.858985646551376e-05, "loss": 3.7816184997558593, "step": 8810 }, { "epoch": 0.07119620932654198, "grad_norm": 0.9576704502105713, "learning_rate": 1.8588240995775547e-05, "loss": 3.59921875, "step": 8820 }, { "epoch": 0.07127693065230903, "grad_norm": 1.2772639989852905, "learning_rate": 1.8586625526037334e-05, "loss": 3.660918426513672, "step": 8830 }, { "epoch": 0.07135765197807609, "grad_norm": 0.6787572503089905, "learning_rate": 1.8585010056299122e-05, "loss": 3.4932376861572267, "step": 8840 }, { "epoch": 0.07143837330384314, "grad_norm": 1.05942702293396, "learning_rate": 1.8583394586560906e-05, "loss": 3.5848129272460936, "step": 8850 }, { "epoch": 0.0715190946296102, "grad_norm": 0.8510199785232544, "learning_rate": 1.8581779116822697e-05, "loss": 3.845606231689453, "step": 8860 }, { "epoch": 0.07159981595537725, "grad_norm": 1.7924519777297974, "learning_rate": 1.8580163647084482e-05, "loss": 3.6612545013427735, "step": 8870 }, { "epoch": 0.0716805372811443, "grad_norm": 1.4368528127670288, "learning_rate": 1.857854817734627e-05, "loss": 3.833296203613281, "step": 8880 }, { "epoch": 0.07176125860691136, "grad_norm": 0.815722644329071, "learning_rate": 1.8576932707608054e-05, "loss": 3.9084625244140625, "step": 8890 }, { "epoch": 0.07184197993267842, "grad_norm": 1.0479127168655396, "learning_rate": 1.8575317237869845e-05, "loss": 3.8471145629882812, "step": 8900 }, { "epoch": 0.07192270125844547, "grad_norm": 1.1395639181137085, "learning_rate": 1.857370176813163e-05, "loss": 3.8676319122314453, "step": 8910 }, { "epoch": 0.07200342258421252, "grad_norm": 1.45448899269104, "learning_rate": 1.8572086298393417e-05, "loss": 3.9720951080322267, "step": 8920 }, { "epoch": 0.07208414390997958, "grad_norm": 0.6612263917922974, "learning_rate": 1.85704708286552e-05, "loss": 3.9298564910888674, "step": 8930 }, { "epoch": 0.07216486523574663, "grad_norm": 1.0548245906829834, "learning_rate": 1.8568855358916992e-05, "loss": 3.8828182220458984, "step": 8940 }, { "epoch": 0.07224558656151368, "grad_norm": 0.9673871994018555, "learning_rate": 1.8567239889178777e-05, "loss": 3.8369510650634764, "step": 8950 }, { "epoch": 0.07232630788728074, "grad_norm": 1.1175246238708496, "learning_rate": 1.8565624419440564e-05, "loss": 3.9193161010742186, "step": 8960 }, { "epoch": 0.0724070292130478, "grad_norm": 1.272528052330017, "learning_rate": 1.856400894970235e-05, "loss": 3.8482860565185546, "step": 8970 }, { "epoch": 0.07248775053881484, "grad_norm": 0.9791563749313354, "learning_rate": 1.856239347996414e-05, "loss": 4.094939041137695, "step": 8980 }, { "epoch": 0.0725684718645819, "grad_norm": 0.8818508982658386, "learning_rate": 1.8560778010225927e-05, "loss": 3.5408538818359374, "step": 8990 }, { "epoch": 0.07264919319034896, "grad_norm": 1.4039037227630615, "learning_rate": 1.855916254048771e-05, "loss": 3.670391845703125, "step": 9000 }, { "epoch": 0.07272991451611602, "grad_norm": 1.5594416856765747, "learning_rate": 1.85575470707495e-05, "loss": 4.019361114501953, "step": 9010 }, { "epoch": 0.07281063584188306, "grad_norm": 2.060734510421753, "learning_rate": 1.8555931601011287e-05, "loss": 3.900314712524414, "step": 9020 }, { "epoch": 0.07289135716765012, "grad_norm": 1.3620145320892334, "learning_rate": 1.8554316131273075e-05, "loss": 4.2317558288574215, "step": 9030 }, { "epoch": 0.07297207849341718, "grad_norm": 1.066686987876892, "learning_rate": 1.855270066153486e-05, "loss": 3.9213253021240235, "step": 9040 }, { "epoch": 0.07305279981918424, "grad_norm": 0.9062454700469971, "learning_rate": 1.8551085191796647e-05, "loss": 4.127095031738281, "step": 9050 }, { "epoch": 0.07313352114495128, "grad_norm": 0.8973999619483948, "learning_rate": 1.8549469722058434e-05, "loss": 3.7639923095703125, "step": 9060 }, { "epoch": 0.07321424247071834, "grad_norm": 0.7580782771110535, "learning_rate": 1.8547854252320222e-05, "loss": 3.3266422271728517, "step": 9070 }, { "epoch": 0.0732949637964854, "grad_norm": 1.088266134262085, "learning_rate": 1.8546238782582006e-05, "loss": 3.452353286743164, "step": 9080 }, { "epoch": 0.07337568512225245, "grad_norm": 1.0264519453048706, "learning_rate": 1.8544623312843794e-05, "loss": 3.831796646118164, "step": 9090 }, { "epoch": 0.0734564064480195, "grad_norm": 2.649029016494751, "learning_rate": 1.8543007843105582e-05, "loss": 4.387778854370117, "step": 9100 }, { "epoch": 0.07353712777378656, "grad_norm": 0.9430144429206848, "learning_rate": 1.854139237336737e-05, "loss": 3.776020812988281, "step": 9110 }, { "epoch": 0.07361784909955361, "grad_norm": 0.851908802986145, "learning_rate": 1.8539776903629154e-05, "loss": 3.584236907958984, "step": 9120 }, { "epoch": 0.07369857042532066, "grad_norm": 1.1903895139694214, "learning_rate": 1.853816143389094e-05, "loss": 4.04382095336914, "step": 9130 }, { "epoch": 0.07377929175108772, "grad_norm": 1.5274673700332642, "learning_rate": 1.853654596415273e-05, "loss": 3.7113380432128906, "step": 9140 }, { "epoch": 0.07386001307685477, "grad_norm": 1.087059497833252, "learning_rate": 1.8534930494414517e-05, "loss": 3.3820964813232424, "step": 9150 }, { "epoch": 0.07394073440262183, "grad_norm": 0.577434241771698, "learning_rate": 1.85333150246763e-05, "loss": 3.7562400817871096, "step": 9160 }, { "epoch": 0.07402145572838888, "grad_norm": 2.042106866836548, "learning_rate": 1.853169955493809e-05, "loss": 3.5373905181884764, "step": 9170 }, { "epoch": 0.07410217705415593, "grad_norm": 1.8404715061187744, "learning_rate": 1.8530084085199877e-05, "loss": 3.5739822387695312, "step": 9180 }, { "epoch": 0.07418289837992299, "grad_norm": 0.7908698320388794, "learning_rate": 1.8528468615461664e-05, "loss": 3.371979904174805, "step": 9190 }, { "epoch": 0.07426361970569005, "grad_norm": 1.0295829772949219, "learning_rate": 1.852685314572345e-05, "loss": 3.76468505859375, "step": 9200 }, { "epoch": 0.0743443410314571, "grad_norm": 1.1845026016235352, "learning_rate": 1.8525237675985236e-05, "loss": 3.524863052368164, "step": 9210 }, { "epoch": 0.07442506235722415, "grad_norm": 1.8489364385604858, "learning_rate": 1.8523622206247024e-05, "loss": 4.094180679321289, "step": 9220 }, { "epoch": 0.07450578368299121, "grad_norm": 0.6061121821403503, "learning_rate": 1.852200673650881e-05, "loss": 4.302643585205078, "step": 9230 }, { "epoch": 0.07458650500875827, "grad_norm": 1.117791771888733, "learning_rate": 1.8520391266770596e-05, "loss": 4.059133148193359, "step": 9240 }, { "epoch": 0.07466722633452531, "grad_norm": 1.179192066192627, "learning_rate": 1.8518775797032384e-05, "loss": 3.556580352783203, "step": 9250 }, { "epoch": 0.07474794766029237, "grad_norm": 0.9745780229568481, "learning_rate": 1.851716032729417e-05, "loss": 3.5792621612548827, "step": 9260 }, { "epoch": 0.07482866898605943, "grad_norm": 0.9233051538467407, "learning_rate": 1.851554485755596e-05, "loss": 3.9295257568359374, "step": 9270 }, { "epoch": 0.07490939031182649, "grad_norm": 1.2837998867034912, "learning_rate": 1.8513929387817743e-05, "loss": 3.2131362915039063, "step": 9280 }, { "epoch": 0.07499011163759353, "grad_norm": 1.1738271713256836, "learning_rate": 1.851231391807953e-05, "loss": 3.643476104736328, "step": 9290 }, { "epoch": 0.07507083296336059, "grad_norm": 0.8125507831573486, "learning_rate": 1.851069844834132e-05, "loss": 3.893036651611328, "step": 9300 }, { "epoch": 0.07515155428912765, "grad_norm": 0.6410837769508362, "learning_rate": 1.8509082978603106e-05, "loss": 4.1427452087402346, "step": 9310 }, { "epoch": 0.0752322756148947, "grad_norm": 0.8279767632484436, "learning_rate": 1.850746750886489e-05, "loss": 3.8380374908447266, "step": 9320 }, { "epoch": 0.07531299694066175, "grad_norm": 1.1182810068130493, "learning_rate": 1.850585203912668e-05, "loss": 3.7046344757080076, "step": 9330 }, { "epoch": 0.07539371826642881, "grad_norm": 1.1326367855072021, "learning_rate": 1.8504236569388466e-05, "loss": 3.802519607543945, "step": 9340 }, { "epoch": 0.07547443959219587, "grad_norm": 0.7302283048629761, "learning_rate": 1.8502621099650254e-05, "loss": 4.049286651611328, "step": 9350 }, { "epoch": 0.07555516091796291, "grad_norm": 0.9956520795822144, "learning_rate": 1.8501005629912038e-05, "loss": 4.417494583129883, "step": 9360 }, { "epoch": 0.07563588224372997, "grad_norm": 1.259714961051941, "learning_rate": 1.8499390160173826e-05, "loss": 3.7848087310791017, "step": 9370 }, { "epoch": 0.07571660356949703, "grad_norm": 1.1990845203399658, "learning_rate": 1.8497774690435613e-05, "loss": 3.465406036376953, "step": 9380 }, { "epoch": 0.07579732489526408, "grad_norm": 1.0456624031066895, "learning_rate": 1.84961592206974e-05, "loss": 3.973126983642578, "step": 9390 }, { "epoch": 0.07587804622103113, "grad_norm": 0.8129740357398987, "learning_rate": 1.8494543750959185e-05, "loss": 3.17236213684082, "step": 9400 }, { "epoch": 0.07595876754679819, "grad_norm": 1.1948432922363281, "learning_rate": 1.8492928281220973e-05, "loss": 3.521042251586914, "step": 9410 }, { "epoch": 0.07603948887256524, "grad_norm": 0.8507177233695984, "learning_rate": 1.849131281148276e-05, "loss": 3.752665710449219, "step": 9420 }, { "epoch": 0.0761202101983323, "grad_norm": 1.1946046352386475, "learning_rate": 1.848969734174455e-05, "loss": 3.380942535400391, "step": 9430 }, { "epoch": 0.07620093152409935, "grad_norm": 1.6992884874343872, "learning_rate": 1.8488081872006333e-05, "loss": 3.7443851470947265, "step": 9440 }, { "epoch": 0.0762816528498664, "grad_norm": 1.1750860214233398, "learning_rate": 1.848646640226812e-05, "loss": 4.362364959716797, "step": 9450 }, { "epoch": 0.07636237417563346, "grad_norm": 1.333754062652588, "learning_rate": 1.8484850932529908e-05, "loss": 3.3994556427001954, "step": 9460 }, { "epoch": 0.07644309550140052, "grad_norm": 1.9402875900268555, "learning_rate": 1.8483235462791696e-05, "loss": 3.7556735992431642, "step": 9470 }, { "epoch": 0.07652381682716757, "grad_norm": 1.6259788274765015, "learning_rate": 1.848161999305348e-05, "loss": 3.7217262268066404, "step": 9480 }, { "epoch": 0.07660453815293462, "grad_norm": 1.1259846687316895, "learning_rate": 1.8480004523315268e-05, "loss": 3.682315444946289, "step": 9490 }, { "epoch": 0.07668525947870168, "grad_norm": 1.497283935546875, "learning_rate": 1.8478389053577056e-05, "loss": 3.501177215576172, "step": 9500 }, { "epoch": 0.07676598080446874, "grad_norm": 1.2552878856658936, "learning_rate": 1.8476773583838843e-05, "loss": 3.6764205932617187, "step": 9510 }, { "epoch": 0.07684670213023578, "grad_norm": 1.6657344102859497, "learning_rate": 1.8475158114100628e-05, "loss": 3.471267318725586, "step": 9520 }, { "epoch": 0.07692742345600284, "grad_norm": 1.2270724773406982, "learning_rate": 1.8473542644362415e-05, "loss": 3.5133682250976563, "step": 9530 }, { "epoch": 0.0770081447817699, "grad_norm": 1.3852410316467285, "learning_rate": 1.8471927174624203e-05, "loss": 3.5472091674804687, "step": 9540 }, { "epoch": 0.07708886610753694, "grad_norm": 1.0219022035598755, "learning_rate": 1.847031170488599e-05, "loss": 3.2736957550048826, "step": 9550 }, { "epoch": 0.077169587433304, "grad_norm": 1.1888659000396729, "learning_rate": 1.8468696235147775e-05, "loss": 3.7910739898681642, "step": 9560 }, { "epoch": 0.07725030875907106, "grad_norm": 1.5554559230804443, "learning_rate": 1.8467080765409563e-05, "loss": 3.626268005371094, "step": 9570 }, { "epoch": 0.07733103008483812, "grad_norm": 1.9875415563583374, "learning_rate": 1.846546529567135e-05, "loss": 3.452239227294922, "step": 9580 }, { "epoch": 0.07741175141060516, "grad_norm": 1.1239949464797974, "learning_rate": 1.8463849825933138e-05, "loss": 3.778773880004883, "step": 9590 }, { "epoch": 0.07749247273637222, "grad_norm": 1.152139663696289, "learning_rate": 1.8462234356194922e-05, "loss": 3.485831451416016, "step": 9600 }, { "epoch": 0.07757319406213928, "grad_norm": 0.7208570241928101, "learning_rate": 1.846061888645671e-05, "loss": 3.889902114868164, "step": 9610 }, { "epoch": 0.07765391538790634, "grad_norm": 0.5867158770561218, "learning_rate": 1.8459003416718498e-05, "loss": 3.5342601776123046, "step": 9620 }, { "epoch": 0.07773463671367338, "grad_norm": 0.7622260451316833, "learning_rate": 1.8457387946980285e-05, "loss": 3.534877395629883, "step": 9630 }, { "epoch": 0.07781535803944044, "grad_norm": 1.3650459051132202, "learning_rate": 1.845577247724207e-05, "loss": 3.9032333374023436, "step": 9640 }, { "epoch": 0.0778960793652075, "grad_norm": 0.7756170034408569, "learning_rate": 1.8454157007503857e-05, "loss": 3.9293544769287108, "step": 9650 }, { "epoch": 0.07797680069097455, "grad_norm": 0.9546665549278259, "learning_rate": 1.8452541537765645e-05, "loss": 3.643467330932617, "step": 9660 }, { "epoch": 0.0780575220167416, "grad_norm": 0.8621135354042053, "learning_rate": 1.8450926068027433e-05, "loss": 3.8234794616699217, "step": 9670 }, { "epoch": 0.07813824334250866, "grad_norm": 1.7388861179351807, "learning_rate": 1.8449310598289217e-05, "loss": 4.142692947387696, "step": 9680 }, { "epoch": 0.07821896466827571, "grad_norm": 1.407363772392273, "learning_rate": 1.8447695128551005e-05, "loss": 3.335797119140625, "step": 9690 }, { "epoch": 0.07829968599404277, "grad_norm": 0.6430290341377258, "learning_rate": 1.8446079658812792e-05, "loss": 3.6048503875732423, "step": 9700 }, { "epoch": 0.07838040731980982, "grad_norm": 0.899316132068634, "learning_rate": 1.844446418907458e-05, "loss": 4.131148147583008, "step": 9710 }, { "epoch": 0.07846112864557687, "grad_norm": 1.2140589952468872, "learning_rate": 1.8442848719336364e-05, "loss": 4.2039844512939455, "step": 9720 }, { "epoch": 0.07854184997134393, "grad_norm": 1.2436939477920532, "learning_rate": 1.8441233249598156e-05, "loss": 3.5642623901367188, "step": 9730 }, { "epoch": 0.07862257129711098, "grad_norm": 1.0114237070083618, "learning_rate": 1.843961777985994e-05, "loss": 3.4475494384765626, "step": 9740 }, { "epoch": 0.07870329262287803, "grad_norm": 0.7749499678611755, "learning_rate": 1.8438002310121728e-05, "loss": 3.8493648529052735, "step": 9750 }, { "epoch": 0.07878401394864509, "grad_norm": 1.5623807907104492, "learning_rate": 1.8436386840383512e-05, "loss": 3.6267765045166014, "step": 9760 }, { "epoch": 0.07886473527441215, "grad_norm": 0.729620635509491, "learning_rate": 1.8434771370645303e-05, "loss": 3.960191345214844, "step": 9770 }, { "epoch": 0.0789454566001792, "grad_norm": 1.061992883682251, "learning_rate": 1.8433155900907087e-05, "loss": 3.882447052001953, "step": 9780 }, { "epoch": 0.07902617792594625, "grad_norm": 1.1715728044509888, "learning_rate": 1.8431540431168875e-05, "loss": 3.9523761749267576, "step": 9790 }, { "epoch": 0.07910689925171331, "grad_norm": 0.6699790954589844, "learning_rate": 1.842992496143066e-05, "loss": 3.481785202026367, "step": 9800 }, { "epoch": 0.07918762057748037, "grad_norm": 1.0522210597991943, "learning_rate": 1.842830949169245e-05, "loss": 3.4228099822998046, "step": 9810 }, { "epoch": 0.07926834190324741, "grad_norm": 1.2557883262634277, "learning_rate": 1.8426694021954235e-05, "loss": 3.4649063110351563, "step": 9820 }, { "epoch": 0.07934906322901447, "grad_norm": 1.0386079549789429, "learning_rate": 1.8425078552216022e-05, "loss": 3.6392383575439453, "step": 9830 }, { "epoch": 0.07942978455478153, "grad_norm": 1.5497863292694092, "learning_rate": 1.8423463082477807e-05, "loss": 3.7301769256591797, "step": 9840 }, { "epoch": 0.07951050588054859, "grad_norm": 1.5998127460479736, "learning_rate": 1.8421847612739598e-05, "loss": 4.244924163818359, "step": 9850 }, { "epoch": 0.07959122720631563, "grad_norm": 0.7848955392837524, "learning_rate": 1.8420232143001382e-05, "loss": 4.027299880981445, "step": 9860 }, { "epoch": 0.07967194853208269, "grad_norm": 1.1591688394546509, "learning_rate": 1.841861667326317e-05, "loss": 3.0662630081176756, "step": 9870 }, { "epoch": 0.07975266985784975, "grad_norm": 1.2187105417251587, "learning_rate": 1.8417001203524954e-05, "loss": 3.3401187896728515, "step": 9880 }, { "epoch": 0.0798333911836168, "grad_norm": 0.5813158750534058, "learning_rate": 1.8415385733786745e-05, "loss": 3.342354965209961, "step": 9890 }, { "epoch": 0.07991411250938385, "grad_norm": 1.0947976112365723, "learning_rate": 1.841377026404853e-05, "loss": 4.0206657409667965, "step": 9900 }, { "epoch": 0.07999483383515091, "grad_norm": 0.616270899772644, "learning_rate": 1.8412154794310317e-05, "loss": 3.6212226867675783, "step": 9910 }, { "epoch": 0.08007555516091797, "grad_norm": 1.6924402713775635, "learning_rate": 1.8410539324572105e-05, "loss": 4.162761306762695, "step": 9920 }, { "epoch": 0.08015627648668502, "grad_norm": 1.1192108392715454, "learning_rate": 1.8408923854833892e-05, "loss": 4.047366714477539, "step": 9930 }, { "epoch": 0.08023699781245207, "grad_norm": 1.7257460355758667, "learning_rate": 1.8407308385095677e-05, "loss": 3.7627944946289062, "step": 9940 }, { "epoch": 0.08031771913821913, "grad_norm": 0.8637246489524841, "learning_rate": 1.8405692915357464e-05, "loss": 3.6325340270996094, "step": 9950 }, { "epoch": 0.08039844046398618, "grad_norm": 0.9714899659156799, "learning_rate": 1.8404077445619252e-05, "loss": 3.316213607788086, "step": 9960 }, { "epoch": 0.08047916178975323, "grad_norm": 1.2141858339309692, "learning_rate": 1.840246197588104e-05, "loss": 3.7018714904785157, "step": 9970 }, { "epoch": 0.08055988311552029, "grad_norm": 0.7250025868415833, "learning_rate": 1.8400846506142824e-05, "loss": 4.046578216552734, "step": 9980 }, { "epoch": 0.08064060444128734, "grad_norm": 0.8112062811851501, "learning_rate": 1.8399231036404612e-05, "loss": 3.3478923797607423, "step": 9990 }, { "epoch": 0.0807213257670544, "grad_norm": 1.9580439329147339, "learning_rate": 1.83976155666664e-05, "loss": 3.482676696777344, "step": 10000 }, { "epoch": 0.08080204709282145, "grad_norm": 1.19406259059906, "learning_rate": 1.8396000096928187e-05, "loss": 3.857670211791992, "step": 10010 }, { "epoch": 0.0808827684185885, "grad_norm": 1.0860891342163086, "learning_rate": 1.839438462718997e-05, "loss": 3.4326961517333983, "step": 10020 }, { "epoch": 0.08096348974435556, "grad_norm": 1.0957341194152832, "learning_rate": 1.839276915745176e-05, "loss": 3.312342071533203, "step": 10030 }, { "epoch": 0.08104421107012262, "grad_norm": 0.6940878033638, "learning_rate": 1.8391153687713547e-05, "loss": 3.7262947082519533, "step": 10040 }, { "epoch": 0.08112493239588967, "grad_norm": 1.4676566123962402, "learning_rate": 1.8389538217975335e-05, "loss": 3.724465560913086, "step": 10050 }, { "epoch": 0.08120565372165672, "grad_norm": 0.9690456390380859, "learning_rate": 1.838792274823712e-05, "loss": 3.4696586608886717, "step": 10060 }, { "epoch": 0.08128637504742378, "grad_norm": 0.983734667301178, "learning_rate": 1.8386307278498907e-05, "loss": 3.771417236328125, "step": 10070 }, { "epoch": 0.08136709637319084, "grad_norm": 1.2890808582305908, "learning_rate": 1.8384691808760694e-05, "loss": 4.007259368896484, "step": 10080 }, { "epoch": 0.08144781769895788, "grad_norm": 1.2523424625396729, "learning_rate": 1.8383076339022482e-05, "loss": 3.3662059783935545, "step": 10090 }, { "epoch": 0.08152853902472494, "grad_norm": 0.7900214195251465, "learning_rate": 1.8381460869284266e-05, "loss": 3.3791332244873047, "step": 10100 }, { "epoch": 0.081609260350492, "grad_norm": 0.7533133029937744, "learning_rate": 1.8379845399546054e-05, "loss": 3.5220882415771486, "step": 10110 }, { "epoch": 0.08168998167625906, "grad_norm": 1.4891935586929321, "learning_rate": 1.837822992980784e-05, "loss": 3.702719879150391, "step": 10120 }, { "epoch": 0.0817707030020261, "grad_norm": 0.6802521347999573, "learning_rate": 1.837661446006963e-05, "loss": 4.02373046875, "step": 10130 }, { "epoch": 0.08185142432779316, "grad_norm": 0.8967511653900146, "learning_rate": 1.8374998990331414e-05, "loss": 3.499441146850586, "step": 10140 }, { "epoch": 0.08193214565356022, "grad_norm": 0.8906269073486328, "learning_rate": 1.83733835205932e-05, "loss": 3.7430801391601562, "step": 10150 }, { "epoch": 0.08201286697932726, "grad_norm": 1.1339713335037231, "learning_rate": 1.837176805085499e-05, "loss": 3.6017826080322264, "step": 10160 }, { "epoch": 0.08209358830509432, "grad_norm": 1.1824935674667358, "learning_rate": 1.8370152581116777e-05, "loss": 3.737778091430664, "step": 10170 }, { "epoch": 0.08217430963086138, "grad_norm": 0.8865266442298889, "learning_rate": 1.836853711137856e-05, "loss": 3.5191017150878907, "step": 10180 }, { "epoch": 0.08225503095662844, "grad_norm": 1.2587045431137085, "learning_rate": 1.836692164164035e-05, "loss": 3.8005603790283202, "step": 10190 }, { "epoch": 0.08233575228239548, "grad_norm": 1.3523495197296143, "learning_rate": 1.8365306171902136e-05, "loss": 4.610843658447266, "step": 10200 }, { "epoch": 0.08241647360816254, "grad_norm": 0.9792190194129944, "learning_rate": 1.8363690702163924e-05, "loss": 3.5131103515625, "step": 10210 }, { "epoch": 0.0824971949339296, "grad_norm": 1.2969801425933838, "learning_rate": 1.836207523242571e-05, "loss": 3.4137107849121096, "step": 10220 }, { "epoch": 0.08257791625969665, "grad_norm": 2.239304542541504, "learning_rate": 1.8360459762687496e-05, "loss": 4.056715393066407, "step": 10230 }, { "epoch": 0.0826586375854637, "grad_norm": 1.3112428188323975, "learning_rate": 1.8358844292949284e-05, "loss": 4.4735771179199215, "step": 10240 }, { "epoch": 0.08273935891123076, "grad_norm": 1.346226453781128, "learning_rate": 1.835722882321107e-05, "loss": 3.816058349609375, "step": 10250 }, { "epoch": 0.08282008023699781, "grad_norm": 1.3648229837417603, "learning_rate": 1.835561335347286e-05, "loss": 4.008932495117188, "step": 10260 }, { "epoch": 0.08290080156276487, "grad_norm": 0.8984281420707703, "learning_rate": 1.8353997883734644e-05, "loss": 4.187336349487305, "step": 10270 }, { "epoch": 0.08298152288853192, "grad_norm": 0.9131896495819092, "learning_rate": 1.835238241399643e-05, "loss": 3.7086860656738283, "step": 10280 }, { "epoch": 0.08306224421429897, "grad_norm": 0.8178196549415588, "learning_rate": 1.835076694425822e-05, "loss": 3.871455764770508, "step": 10290 }, { "epoch": 0.08314296554006603, "grad_norm": 0.7432368993759155, "learning_rate": 1.8349151474520007e-05, "loss": 3.5735889434814454, "step": 10300 }, { "epoch": 0.08322368686583309, "grad_norm": 0.7923622131347656, "learning_rate": 1.834753600478179e-05, "loss": 3.9460697174072266, "step": 10310 }, { "epoch": 0.08330440819160013, "grad_norm": 1.2085367441177368, "learning_rate": 1.834592053504358e-05, "loss": 3.6075519561767577, "step": 10320 }, { "epoch": 0.08338512951736719, "grad_norm": 1.2306585311889648, "learning_rate": 1.8344305065305366e-05, "loss": 3.435403060913086, "step": 10330 }, { "epoch": 0.08346585084313425, "grad_norm": 1.0097901821136475, "learning_rate": 1.8342689595567154e-05, "loss": 3.2834003448486326, "step": 10340 }, { "epoch": 0.0835465721689013, "grad_norm": 1.2667489051818848, "learning_rate": 1.8341074125828938e-05, "loss": 3.3383155822753907, "step": 10350 }, { "epoch": 0.08362729349466835, "grad_norm": 1.2518690824508667, "learning_rate": 1.8339458656090726e-05, "loss": 3.9253726959228517, "step": 10360 }, { "epoch": 0.08370801482043541, "grad_norm": 0.8231846690177917, "learning_rate": 1.8337843186352514e-05, "loss": 3.4369258880615234, "step": 10370 }, { "epoch": 0.08378873614620247, "grad_norm": 0.9183236360549927, "learning_rate": 1.83362277166143e-05, "loss": 3.8182609558105467, "step": 10380 }, { "epoch": 0.08386945747196951, "grad_norm": 1.561364769935608, "learning_rate": 1.8334612246876086e-05, "loss": 3.372935485839844, "step": 10390 }, { "epoch": 0.08395017879773657, "grad_norm": 0.5705371499061584, "learning_rate": 1.8332996777137873e-05, "loss": 3.9834579467773437, "step": 10400 }, { "epoch": 0.08403090012350363, "grad_norm": 1.2178584337234497, "learning_rate": 1.833138130739966e-05, "loss": 3.4878154754638673, "step": 10410 }, { "epoch": 0.08411162144927069, "grad_norm": 1.3459970951080322, "learning_rate": 1.832976583766145e-05, "loss": 3.657025146484375, "step": 10420 }, { "epoch": 0.08419234277503773, "grad_norm": 0.7970707416534424, "learning_rate": 1.8328150367923233e-05, "loss": 3.6728656768798826, "step": 10430 }, { "epoch": 0.08427306410080479, "grad_norm": 0.5849568843841553, "learning_rate": 1.832653489818502e-05, "loss": 3.7558998107910155, "step": 10440 }, { "epoch": 0.08435378542657185, "grad_norm": 1.0777479410171509, "learning_rate": 1.832491942844681e-05, "loss": 3.666646957397461, "step": 10450 }, { "epoch": 0.0844345067523389, "grad_norm": 0.8859905004501343, "learning_rate": 1.8323303958708596e-05, "loss": 3.4350486755371095, "step": 10460 }, { "epoch": 0.08451522807810595, "grad_norm": 0.5482928156852722, "learning_rate": 1.832168848897038e-05, "loss": 3.2411388397216796, "step": 10470 }, { "epoch": 0.08459594940387301, "grad_norm": 1.2332041263580322, "learning_rate": 1.8320073019232168e-05, "loss": 4.265539169311523, "step": 10480 }, { "epoch": 0.08467667072964007, "grad_norm": 1.1381769180297852, "learning_rate": 1.8318457549493956e-05, "loss": 3.838698959350586, "step": 10490 }, { "epoch": 0.08475739205540712, "grad_norm": 1.364471435546875, "learning_rate": 1.8316842079755744e-05, "loss": 3.587885284423828, "step": 10500 }, { "epoch": 0.08483811338117417, "grad_norm": 1.0041162967681885, "learning_rate": 1.8315226610017528e-05, "loss": 3.715959167480469, "step": 10510 }, { "epoch": 0.08491883470694123, "grad_norm": 1.0571085214614868, "learning_rate": 1.8313611140279316e-05, "loss": 3.5508731842041015, "step": 10520 }, { "epoch": 0.08499955603270828, "grad_norm": 1.0228803157806396, "learning_rate": 1.8311995670541103e-05, "loss": 3.2796794891357424, "step": 10530 }, { "epoch": 0.08508027735847534, "grad_norm": 0.8605148196220398, "learning_rate": 1.831038020080289e-05, "loss": 3.9805015563964843, "step": 10540 }, { "epoch": 0.08516099868424239, "grad_norm": 1.0306884050369263, "learning_rate": 1.8308764731064675e-05, "loss": 3.4275474548339844, "step": 10550 }, { "epoch": 0.08524172001000944, "grad_norm": 1.1600704193115234, "learning_rate": 1.8307149261326463e-05, "loss": 3.7159313201904296, "step": 10560 }, { "epoch": 0.0853224413357765, "grad_norm": 0.7411661744117737, "learning_rate": 1.830553379158825e-05, "loss": 3.604277801513672, "step": 10570 }, { "epoch": 0.08540316266154355, "grad_norm": 0.9244654774665833, "learning_rate": 1.8303918321850038e-05, "loss": 3.310076141357422, "step": 10580 }, { "epoch": 0.0854838839873106, "grad_norm": 1.135331392288208, "learning_rate": 1.8302302852111823e-05, "loss": 3.4245914459228515, "step": 10590 }, { "epoch": 0.08556460531307766, "grad_norm": 2.0472426414489746, "learning_rate": 1.830068738237361e-05, "loss": 3.762799072265625, "step": 10600 }, { "epoch": 0.08564532663884472, "grad_norm": 1.07703697681427, "learning_rate": 1.8299071912635398e-05, "loss": 3.1257068634033205, "step": 10610 }, { "epoch": 0.08572604796461177, "grad_norm": 1.3632314205169678, "learning_rate": 1.8297456442897186e-05, "loss": 4.153037261962891, "step": 10620 }, { "epoch": 0.08580676929037882, "grad_norm": 0.9114497303962708, "learning_rate": 1.829584097315897e-05, "loss": 3.690982437133789, "step": 10630 }, { "epoch": 0.08588749061614588, "grad_norm": 0.9186844229698181, "learning_rate": 1.829422550342076e-05, "loss": 3.87947998046875, "step": 10640 }, { "epoch": 0.08596821194191294, "grad_norm": 2.354149103164673, "learning_rate": 1.8292610033682545e-05, "loss": 3.7253822326660155, "step": 10650 }, { "epoch": 0.08604893326767998, "grad_norm": 1.3042947053909302, "learning_rate": 1.8290994563944333e-05, "loss": 3.7000045776367188, "step": 10660 }, { "epoch": 0.08612965459344704, "grad_norm": 0.7564527988433838, "learning_rate": 1.8289379094206117e-05, "loss": 3.6456253051757814, "step": 10670 }, { "epoch": 0.0862103759192141, "grad_norm": 1.523529291152954, "learning_rate": 1.828776362446791e-05, "loss": 3.6436065673828124, "step": 10680 }, { "epoch": 0.08629109724498116, "grad_norm": 0.9179992079734802, "learning_rate": 1.8286148154729693e-05, "loss": 3.836322021484375, "step": 10690 }, { "epoch": 0.0863718185707482, "grad_norm": 1.3390719890594482, "learning_rate": 1.828453268499148e-05, "loss": 3.583274078369141, "step": 10700 }, { "epoch": 0.08645253989651526, "grad_norm": 0.8909754157066345, "learning_rate": 1.8282917215253265e-05, "loss": 4.2296287536621096, "step": 10710 }, { "epoch": 0.08653326122228232, "grad_norm": 1.0042569637298584, "learning_rate": 1.8281301745515056e-05, "loss": 3.3347991943359374, "step": 10720 }, { "epoch": 0.08661398254804938, "grad_norm": 0.9659199118614197, "learning_rate": 1.827968627577684e-05, "loss": 4.083287048339844, "step": 10730 }, { "epoch": 0.08669470387381642, "grad_norm": 1.0517796277999878, "learning_rate": 1.8278070806038628e-05, "loss": 3.7630210876464845, "step": 10740 }, { "epoch": 0.08677542519958348, "grad_norm": 1.747799277305603, "learning_rate": 1.8276455336300412e-05, "loss": 4.138328933715821, "step": 10750 }, { "epoch": 0.08685614652535054, "grad_norm": 0.7849706411361694, "learning_rate": 1.8274839866562203e-05, "loss": 3.286814880371094, "step": 10760 }, { "epoch": 0.08693686785111758, "grad_norm": 0.7597224712371826, "learning_rate": 1.8273224396823988e-05, "loss": 3.425436019897461, "step": 10770 }, { "epoch": 0.08701758917688464, "grad_norm": 0.9353169798851013, "learning_rate": 1.8271608927085775e-05, "loss": 3.7492576599121095, "step": 10780 }, { "epoch": 0.0870983105026517, "grad_norm": 1.3365981578826904, "learning_rate": 1.8269993457347563e-05, "loss": 3.2967105865478517, "step": 10790 }, { "epoch": 0.08717903182841875, "grad_norm": 0.9211113452911377, "learning_rate": 1.826837798760935e-05, "loss": 3.778936004638672, "step": 10800 }, { "epoch": 0.0872597531541858, "grad_norm": 0.879108190536499, "learning_rate": 1.8266762517871135e-05, "loss": 3.2628097534179688, "step": 10810 }, { "epoch": 0.08734047447995286, "grad_norm": 1.1241729259490967, "learning_rate": 1.8265147048132923e-05, "loss": 3.805898666381836, "step": 10820 }, { "epoch": 0.08742119580571991, "grad_norm": 1.0081367492675781, "learning_rate": 1.826353157839471e-05, "loss": 3.2460399627685548, "step": 10830 }, { "epoch": 0.08750191713148697, "grad_norm": 1.3666208982467651, "learning_rate": 1.8261916108656498e-05, "loss": 3.6295719146728516, "step": 10840 }, { "epoch": 0.08758263845725402, "grad_norm": 1.03868567943573, "learning_rate": 1.8260300638918282e-05, "loss": 3.9721958160400392, "step": 10850 }, { "epoch": 0.08766335978302107, "grad_norm": 2.828416347503662, "learning_rate": 1.825868516918007e-05, "loss": 4.042898559570313, "step": 10860 }, { "epoch": 0.08774408110878813, "grad_norm": 0.6701147556304932, "learning_rate": 1.8257069699441858e-05, "loss": 3.3634365081787108, "step": 10870 }, { "epoch": 0.08782480243455519, "grad_norm": 1.5067558288574219, "learning_rate": 1.8255454229703645e-05, "loss": 4.142113113403321, "step": 10880 }, { "epoch": 0.08790552376032224, "grad_norm": 1.0425832271575928, "learning_rate": 1.825383875996543e-05, "loss": 3.2512950897216797, "step": 10890 }, { "epoch": 0.0879862450860893, "grad_norm": 0.7855437994003296, "learning_rate": 1.8252223290227217e-05, "loss": 3.5104496002197267, "step": 10900 }, { "epoch": 0.08806696641185635, "grad_norm": 1.0242457389831543, "learning_rate": 1.8250607820489005e-05, "loss": 4.3557392120361325, "step": 10910 }, { "epoch": 0.08814768773762341, "grad_norm": 2.5469939708709717, "learning_rate": 1.8248992350750793e-05, "loss": 3.9515731811523436, "step": 10920 }, { "epoch": 0.08822840906339045, "grad_norm": 1.4390970468521118, "learning_rate": 1.8247376881012577e-05, "loss": 4.698971557617187, "step": 10930 }, { "epoch": 0.08830913038915751, "grad_norm": 1.2110254764556885, "learning_rate": 1.8245761411274365e-05, "loss": 3.732004165649414, "step": 10940 }, { "epoch": 0.08838985171492457, "grad_norm": 1.0607656240463257, "learning_rate": 1.8244145941536152e-05, "loss": 3.2514469146728517, "step": 10950 }, { "epoch": 0.08847057304069161, "grad_norm": 1.2257949113845825, "learning_rate": 1.824253047179794e-05, "loss": 4.051048278808594, "step": 10960 }, { "epoch": 0.08855129436645867, "grad_norm": 1.1001522541046143, "learning_rate": 1.8240915002059724e-05, "loss": 3.9316314697265624, "step": 10970 }, { "epoch": 0.08863201569222573, "grad_norm": 2.8858096599578857, "learning_rate": 1.8239299532321512e-05, "loss": 3.501503753662109, "step": 10980 }, { "epoch": 0.08871273701799279, "grad_norm": 1.1206493377685547, "learning_rate": 1.82376840625833e-05, "loss": 3.7549118041992187, "step": 10990 }, { "epoch": 0.08879345834375983, "grad_norm": 0.8999654054641724, "learning_rate": 1.8236068592845088e-05, "loss": 3.5436901092529296, "step": 11000 }, { "epoch": 0.08887417966952689, "grad_norm": 1.2932820320129395, "learning_rate": 1.8234453123106872e-05, "loss": 3.513412094116211, "step": 11010 }, { "epoch": 0.08895490099529395, "grad_norm": 1.06937837600708, "learning_rate": 1.823283765336866e-05, "loss": 3.8261985778808594, "step": 11020 }, { "epoch": 0.089035622321061, "grad_norm": 1.2786813974380493, "learning_rate": 1.8231222183630447e-05, "loss": 3.5783214569091797, "step": 11030 }, { "epoch": 0.08911634364682805, "grad_norm": 1.0038374662399292, "learning_rate": 1.8229606713892235e-05, "loss": 3.2526702880859375, "step": 11040 }, { "epoch": 0.08919706497259511, "grad_norm": 0.9138091206550598, "learning_rate": 1.822799124415402e-05, "loss": 3.4832054138183595, "step": 11050 }, { "epoch": 0.08927778629836217, "grad_norm": 1.4649887084960938, "learning_rate": 1.8226375774415807e-05, "loss": 3.297486114501953, "step": 11060 }, { "epoch": 0.08935850762412922, "grad_norm": 1.9420076608657837, "learning_rate": 1.8224760304677595e-05, "loss": 3.2390838623046876, "step": 11070 }, { "epoch": 0.08943922894989627, "grad_norm": 0.8753891587257385, "learning_rate": 1.8223144834939382e-05, "loss": 3.7441959381103516, "step": 11080 }, { "epoch": 0.08951995027566333, "grad_norm": 0.9034563302993774, "learning_rate": 1.8221529365201167e-05, "loss": 3.7124313354492187, "step": 11090 }, { "epoch": 0.08960067160143038, "grad_norm": 1.0981321334838867, "learning_rate": 1.8219913895462954e-05, "loss": 3.5218738555908202, "step": 11100 }, { "epoch": 0.08968139292719744, "grad_norm": 1.0882701873779297, "learning_rate": 1.8218298425724742e-05, "loss": 3.665557861328125, "step": 11110 }, { "epoch": 0.08976211425296449, "grad_norm": 1.7777496576309204, "learning_rate": 1.821668295598653e-05, "loss": 3.6902000427246096, "step": 11120 }, { "epoch": 0.08984283557873154, "grad_norm": 0.8283280730247498, "learning_rate": 1.8215067486248314e-05, "loss": 3.602320098876953, "step": 11130 }, { "epoch": 0.0899235569044986, "grad_norm": 0.9510522484779358, "learning_rate": 1.82134520165101e-05, "loss": 3.682831573486328, "step": 11140 }, { "epoch": 0.09000427823026566, "grad_norm": 0.7453126907348633, "learning_rate": 1.821183654677189e-05, "loss": 3.7969692230224608, "step": 11150 }, { "epoch": 0.0900849995560327, "grad_norm": 1.2896671295166016, "learning_rate": 1.8210221077033677e-05, "loss": 3.593307876586914, "step": 11160 }, { "epoch": 0.09016572088179976, "grad_norm": 1.1757994890213013, "learning_rate": 1.820860560729546e-05, "loss": 3.319280242919922, "step": 11170 }, { "epoch": 0.09024644220756682, "grad_norm": 1.1723276376724243, "learning_rate": 1.820699013755725e-05, "loss": 3.677831268310547, "step": 11180 }, { "epoch": 0.09032716353333387, "grad_norm": 1.4228602647781372, "learning_rate": 1.8205374667819037e-05, "loss": 3.729094314575195, "step": 11190 }, { "epoch": 0.09040788485910092, "grad_norm": 1.153214931488037, "learning_rate": 1.8203759198080824e-05, "loss": 3.510776138305664, "step": 11200 }, { "epoch": 0.09048860618486798, "grad_norm": 0.9927562475204468, "learning_rate": 1.820214372834261e-05, "loss": 3.846516799926758, "step": 11210 }, { "epoch": 0.09056932751063504, "grad_norm": 1.2238807678222656, "learning_rate": 1.8200528258604396e-05, "loss": 3.529192352294922, "step": 11220 }, { "epoch": 0.09065004883640208, "grad_norm": 1.0506196022033691, "learning_rate": 1.8198912788866184e-05, "loss": 3.4943904876708984, "step": 11230 }, { "epoch": 0.09073077016216914, "grad_norm": 1.0642677545547485, "learning_rate": 1.8197297319127972e-05, "loss": 3.568267822265625, "step": 11240 }, { "epoch": 0.0908114914879362, "grad_norm": 1.1114660501480103, "learning_rate": 1.8195681849389756e-05, "loss": 4.011236190795898, "step": 11250 }, { "epoch": 0.09089221281370326, "grad_norm": 0.9316179156303406, "learning_rate": 1.8194066379651544e-05, "loss": 3.535223388671875, "step": 11260 }, { "epoch": 0.0909729341394703, "grad_norm": 1.882612943649292, "learning_rate": 1.819245090991333e-05, "loss": 4.011322402954102, "step": 11270 }, { "epoch": 0.09105365546523736, "grad_norm": 0.7391141653060913, "learning_rate": 1.819083544017512e-05, "loss": 3.4059009552001953, "step": 11280 }, { "epoch": 0.09113437679100442, "grad_norm": 1.2588123083114624, "learning_rate": 1.8189219970436903e-05, "loss": 3.5163509368896486, "step": 11290 }, { "epoch": 0.09121509811677148, "grad_norm": 0.8295601606369019, "learning_rate": 1.818760450069869e-05, "loss": 3.7106399536132812, "step": 11300 }, { "epoch": 0.09129581944253852, "grad_norm": 0.644668698310852, "learning_rate": 1.818598903096048e-05, "loss": 3.264712905883789, "step": 11310 }, { "epoch": 0.09137654076830558, "grad_norm": 0.9316566586494446, "learning_rate": 1.8184373561222267e-05, "loss": 3.6843753814697267, "step": 11320 }, { "epoch": 0.09145726209407264, "grad_norm": 1.6180671453475952, "learning_rate": 1.818275809148405e-05, "loss": 3.3212799072265624, "step": 11330 }, { "epoch": 0.0915379834198397, "grad_norm": 1.4468268156051636, "learning_rate": 1.818114262174584e-05, "loss": 3.6254650115966798, "step": 11340 }, { "epoch": 0.09161870474560674, "grad_norm": 0.9562880396842957, "learning_rate": 1.8179527152007626e-05, "loss": 3.161657524108887, "step": 11350 }, { "epoch": 0.0916994260713738, "grad_norm": 1.2517825365066528, "learning_rate": 1.8177911682269414e-05, "loss": 3.8713058471679687, "step": 11360 }, { "epoch": 0.09178014739714085, "grad_norm": 0.8511696457862854, "learning_rate": 1.8176296212531198e-05, "loss": 3.7284645080566405, "step": 11370 }, { "epoch": 0.0918608687229079, "grad_norm": 2.104182243347168, "learning_rate": 1.8174680742792986e-05, "loss": 4.007300186157226, "step": 11380 }, { "epoch": 0.09194159004867496, "grad_norm": 0.8354575037956238, "learning_rate": 1.8173065273054774e-05, "loss": 3.809981918334961, "step": 11390 }, { "epoch": 0.09202231137444201, "grad_norm": 0.9994162321090698, "learning_rate": 1.817144980331656e-05, "loss": 4.679251098632813, "step": 11400 }, { "epoch": 0.09210303270020907, "grad_norm": 0.7664873600006104, "learning_rate": 1.8169834333578346e-05, "loss": 3.3642505645751952, "step": 11410 }, { "epoch": 0.09218375402597612, "grad_norm": 0.839701771736145, "learning_rate": 1.8168218863840133e-05, "loss": 3.533351516723633, "step": 11420 }, { "epoch": 0.09226447535174317, "grad_norm": 1.1071995496749878, "learning_rate": 1.816660339410192e-05, "loss": 3.3982162475585938, "step": 11430 }, { "epoch": 0.09234519667751023, "grad_norm": 1.0398160219192505, "learning_rate": 1.816498792436371e-05, "loss": 3.8507205963134767, "step": 11440 }, { "epoch": 0.09242591800327729, "grad_norm": 0.6967927813529968, "learning_rate": 1.8163372454625493e-05, "loss": 3.278018569946289, "step": 11450 }, { "epoch": 0.09250663932904434, "grad_norm": 2.968604564666748, "learning_rate": 1.816175698488728e-05, "loss": 3.556266021728516, "step": 11460 }, { "epoch": 0.0925873606548114, "grad_norm": 0.7451035380363464, "learning_rate": 1.816014151514907e-05, "loss": 3.210115432739258, "step": 11470 }, { "epoch": 0.09266808198057845, "grad_norm": 1.220066785812378, "learning_rate": 1.8158526045410856e-05, "loss": 4.0682838439941404, "step": 11480 }, { "epoch": 0.09274880330634551, "grad_norm": 0.5855391025543213, "learning_rate": 1.815691057567264e-05, "loss": 3.751372146606445, "step": 11490 }, { "epoch": 0.09282952463211255, "grad_norm": 0.7368848919868469, "learning_rate": 1.8155295105934428e-05, "loss": 3.915380859375, "step": 11500 }, { "epoch": 0.09291024595787961, "grad_norm": 1.3051668405532837, "learning_rate": 1.815367963619622e-05, "loss": 3.6111499786376955, "step": 11510 }, { "epoch": 0.09299096728364667, "grad_norm": 1.2189385890960693, "learning_rate": 1.8152064166458003e-05, "loss": 3.8187198638916016, "step": 11520 }, { "epoch": 0.09307168860941373, "grad_norm": 1.1476466655731201, "learning_rate": 1.815044869671979e-05, "loss": 3.60841064453125, "step": 11530 }, { "epoch": 0.09315240993518077, "grad_norm": 0.9930242896080017, "learning_rate": 1.8148833226981575e-05, "loss": 4.105327987670899, "step": 11540 }, { "epoch": 0.09323313126094783, "grad_norm": 1.0417311191558838, "learning_rate": 1.8147217757243367e-05, "loss": 3.8715129852294923, "step": 11550 }, { "epoch": 0.09331385258671489, "grad_norm": 0.7688308954238892, "learning_rate": 1.814560228750515e-05, "loss": 3.9294776916503906, "step": 11560 }, { "epoch": 0.09339457391248193, "grad_norm": 1.2368069887161255, "learning_rate": 1.814398681776694e-05, "loss": 3.6924705505371094, "step": 11570 }, { "epoch": 0.09347529523824899, "grad_norm": 0.9651888012886047, "learning_rate": 1.8142371348028723e-05, "loss": 3.542555236816406, "step": 11580 }, { "epoch": 0.09355601656401605, "grad_norm": 1.091500997543335, "learning_rate": 1.8140755878290514e-05, "loss": 3.877598190307617, "step": 11590 }, { "epoch": 0.0936367378897831, "grad_norm": 1.2506455183029175, "learning_rate": 1.8139140408552298e-05, "loss": 3.6668773651123048, "step": 11600 }, { "epoch": 0.09371745921555015, "grad_norm": 1.08695387840271, "learning_rate": 1.8137524938814086e-05, "loss": 3.711635971069336, "step": 11610 }, { "epoch": 0.09379818054131721, "grad_norm": 0.8176360726356506, "learning_rate": 1.813590946907587e-05, "loss": 3.5141815185546874, "step": 11620 }, { "epoch": 0.09387890186708427, "grad_norm": 1.007534146308899, "learning_rate": 1.813429399933766e-05, "loss": 3.643154525756836, "step": 11630 }, { "epoch": 0.09395962319285132, "grad_norm": 1.174790620803833, "learning_rate": 1.8132678529599446e-05, "loss": 3.7000579833984375, "step": 11640 }, { "epoch": 0.09404034451861837, "grad_norm": 0.8877544403076172, "learning_rate": 1.8131063059861233e-05, "loss": 3.602005386352539, "step": 11650 }, { "epoch": 0.09412106584438543, "grad_norm": 1.2592169046401978, "learning_rate": 1.812944759012302e-05, "loss": 3.4719120025634767, "step": 11660 }, { "epoch": 0.09420178717015248, "grad_norm": 1.5691958665847778, "learning_rate": 1.812783212038481e-05, "loss": 3.7869659423828126, "step": 11670 }, { "epoch": 0.09428250849591954, "grad_norm": 0.7366216778755188, "learning_rate": 1.8126216650646593e-05, "loss": 3.946427917480469, "step": 11680 }, { "epoch": 0.09436322982168659, "grad_norm": 1.4160003662109375, "learning_rate": 1.812460118090838e-05, "loss": 3.5431652069091797, "step": 11690 }, { "epoch": 0.09444395114745364, "grad_norm": 0.9394986033439636, "learning_rate": 1.812298571117017e-05, "loss": 3.229570007324219, "step": 11700 }, { "epoch": 0.0945246724732207, "grad_norm": 1.0107370615005493, "learning_rate": 1.8121370241431956e-05, "loss": 3.3778846740722654, "step": 11710 }, { "epoch": 0.09460539379898776, "grad_norm": 1.3958197832107544, "learning_rate": 1.811975477169374e-05, "loss": 3.472262954711914, "step": 11720 }, { "epoch": 0.0946861151247548, "grad_norm": 1.3309992551803589, "learning_rate": 1.8118139301955528e-05, "loss": 3.711239242553711, "step": 11730 }, { "epoch": 0.09476683645052186, "grad_norm": 0.9059507846832275, "learning_rate": 1.8116523832217316e-05, "loss": 3.637379837036133, "step": 11740 }, { "epoch": 0.09484755777628892, "grad_norm": 1.3212283849716187, "learning_rate": 1.8114908362479103e-05, "loss": 3.774494171142578, "step": 11750 }, { "epoch": 0.09492827910205598, "grad_norm": 1.1436715126037598, "learning_rate": 1.8113292892740888e-05, "loss": 3.287090301513672, "step": 11760 }, { "epoch": 0.09500900042782302, "grad_norm": 0.9125611186027527, "learning_rate": 1.8111677423002675e-05, "loss": 3.456620788574219, "step": 11770 }, { "epoch": 0.09508972175359008, "grad_norm": 1.0789382457733154, "learning_rate": 1.8110061953264463e-05, "loss": 3.267046356201172, "step": 11780 }, { "epoch": 0.09517044307935714, "grad_norm": 0.8446670770645142, "learning_rate": 1.810844648352625e-05, "loss": 3.151365280151367, "step": 11790 }, { "epoch": 0.09525116440512418, "grad_norm": 0.9821216464042664, "learning_rate": 1.8106831013788035e-05, "loss": 3.3328624725341798, "step": 11800 }, { "epoch": 0.09533188573089124, "grad_norm": 1.0167367458343506, "learning_rate": 1.8105215544049823e-05, "loss": 4.195100402832031, "step": 11810 }, { "epoch": 0.0954126070566583, "grad_norm": 1.0052990913391113, "learning_rate": 1.810360007431161e-05, "loss": 3.7066131591796876, "step": 11820 }, { "epoch": 0.09549332838242536, "grad_norm": 0.7609302401542664, "learning_rate": 1.8101984604573398e-05, "loss": 3.4074920654296874, "step": 11830 }, { "epoch": 0.0955740497081924, "grad_norm": 1.0933443307876587, "learning_rate": 1.8100369134835183e-05, "loss": 3.4646251678466795, "step": 11840 }, { "epoch": 0.09565477103395946, "grad_norm": 1.1027852296829224, "learning_rate": 1.809875366509697e-05, "loss": 3.4739967346191407, "step": 11850 }, { "epoch": 0.09573549235972652, "grad_norm": 0.9603446125984192, "learning_rate": 1.8097138195358758e-05, "loss": 3.419361877441406, "step": 11860 }, { "epoch": 0.09581621368549358, "grad_norm": 1.2277381420135498, "learning_rate": 1.8095522725620546e-05, "loss": 3.683431625366211, "step": 11870 }, { "epoch": 0.09589693501126062, "grad_norm": 1.2442787885665894, "learning_rate": 1.809390725588233e-05, "loss": 3.1430889129638673, "step": 11880 }, { "epoch": 0.09597765633702768, "grad_norm": 1.1618796586990356, "learning_rate": 1.8092291786144118e-05, "loss": 3.267678451538086, "step": 11890 }, { "epoch": 0.09605837766279474, "grad_norm": 0.9339056015014648, "learning_rate": 1.8090676316405905e-05, "loss": 3.332570266723633, "step": 11900 }, { "epoch": 0.0961390989885618, "grad_norm": 0.6357445120811462, "learning_rate": 1.8089060846667693e-05, "loss": 3.920181655883789, "step": 11910 }, { "epoch": 0.09621982031432884, "grad_norm": 0.9835296869277954, "learning_rate": 1.8087445376929477e-05, "loss": 4.321204757690429, "step": 11920 }, { "epoch": 0.0963005416400959, "grad_norm": 0.9126607775688171, "learning_rate": 1.8085829907191265e-05, "loss": 3.8370906829833986, "step": 11930 }, { "epoch": 0.09638126296586295, "grad_norm": 0.9461343884468079, "learning_rate": 1.8084214437453053e-05, "loss": 4.197310256958008, "step": 11940 }, { "epoch": 0.09646198429163001, "grad_norm": 1.1659879684448242, "learning_rate": 1.808259896771484e-05, "loss": 3.2295486450195314, "step": 11950 }, { "epoch": 0.09654270561739706, "grad_norm": 1.0380218029022217, "learning_rate": 1.8080983497976625e-05, "loss": 3.45560302734375, "step": 11960 }, { "epoch": 0.09662342694316411, "grad_norm": 1.4435161352157593, "learning_rate": 1.8079368028238412e-05, "loss": 4.046021270751953, "step": 11970 }, { "epoch": 0.09670414826893117, "grad_norm": 1.2306809425354004, "learning_rate": 1.80777525585002e-05, "loss": 3.294040298461914, "step": 11980 }, { "epoch": 0.09678486959469822, "grad_norm": 0.7826406955718994, "learning_rate": 1.8076137088761988e-05, "loss": 3.9556838989257814, "step": 11990 }, { "epoch": 0.09686559092046527, "grad_norm": 1.3186484575271606, "learning_rate": 1.8074521619023772e-05, "loss": 3.690155029296875, "step": 12000 }, { "epoch": 0.09694631224623233, "grad_norm": 1.8475478887557983, "learning_rate": 1.807290614928556e-05, "loss": 3.8257862091064454, "step": 12010 }, { "epoch": 0.09702703357199939, "grad_norm": 0.7306244373321533, "learning_rate": 1.8071290679547347e-05, "loss": 3.4504379272460937, "step": 12020 }, { "epoch": 0.09710775489776644, "grad_norm": 1.292966365814209, "learning_rate": 1.8069675209809135e-05, "loss": 3.5567493438720703, "step": 12030 }, { "epoch": 0.0971884762235335, "grad_norm": 1.018592119216919, "learning_rate": 1.806805974007092e-05, "loss": 3.772835540771484, "step": 12040 }, { "epoch": 0.09726919754930055, "grad_norm": 1.178733229637146, "learning_rate": 1.8066444270332707e-05, "loss": 3.946982192993164, "step": 12050 }, { "epoch": 0.09734991887506761, "grad_norm": 1.0190784931182861, "learning_rate": 1.8064828800594495e-05, "loss": 3.566318130493164, "step": 12060 }, { "epoch": 0.09743064020083465, "grad_norm": 0.903897762298584, "learning_rate": 1.8063213330856283e-05, "loss": 3.624649429321289, "step": 12070 }, { "epoch": 0.09751136152660171, "grad_norm": 0.8109442591667175, "learning_rate": 1.8061597861118067e-05, "loss": 4.242225265502929, "step": 12080 }, { "epoch": 0.09759208285236877, "grad_norm": 0.6779379844665527, "learning_rate": 1.8059982391379855e-05, "loss": 3.3709869384765625, "step": 12090 }, { "epoch": 0.09767280417813583, "grad_norm": 0.9223534464836121, "learning_rate": 1.8058366921641642e-05, "loss": 3.308393859863281, "step": 12100 }, { "epoch": 0.09775352550390287, "grad_norm": 0.9303340315818787, "learning_rate": 1.805675145190343e-05, "loss": 3.2115142822265623, "step": 12110 }, { "epoch": 0.09783424682966993, "grad_norm": 1.1176214218139648, "learning_rate": 1.8055135982165214e-05, "loss": 4.331555557250977, "step": 12120 }, { "epoch": 0.09791496815543699, "grad_norm": 0.8181800842285156, "learning_rate": 1.8053520512427002e-05, "loss": 3.4917224884033202, "step": 12130 }, { "epoch": 0.09799568948120405, "grad_norm": 1.247480869293213, "learning_rate": 1.805190504268879e-05, "loss": 3.599603271484375, "step": 12140 }, { "epoch": 0.09807641080697109, "grad_norm": 0.642316997051239, "learning_rate": 1.8050289572950577e-05, "loss": 3.5888668060302735, "step": 12150 }, { "epoch": 0.09815713213273815, "grad_norm": 1.0359461307525635, "learning_rate": 1.804867410321236e-05, "loss": 3.829515075683594, "step": 12160 }, { "epoch": 0.0982378534585052, "grad_norm": 0.9412827491760254, "learning_rate": 1.804705863347415e-05, "loss": 3.6381893157958984, "step": 12170 }, { "epoch": 0.09831857478427225, "grad_norm": 1.2299392223358154, "learning_rate": 1.8045443163735937e-05, "loss": 3.855612564086914, "step": 12180 }, { "epoch": 0.09839929611003931, "grad_norm": 0.9671002626419067, "learning_rate": 1.8043827693997725e-05, "loss": 3.500125503540039, "step": 12190 }, { "epoch": 0.09848001743580637, "grad_norm": 1.327741265296936, "learning_rate": 1.804221222425951e-05, "loss": 3.1665670394897463, "step": 12200 }, { "epoch": 0.09856073876157342, "grad_norm": 0.722423255443573, "learning_rate": 1.8040596754521297e-05, "loss": 3.635148620605469, "step": 12210 }, { "epoch": 0.09864146008734047, "grad_norm": 0.9775083661079407, "learning_rate": 1.8038981284783084e-05, "loss": 3.399690628051758, "step": 12220 }, { "epoch": 0.09872218141310753, "grad_norm": 4.357423782348633, "learning_rate": 1.8037365815044872e-05, "loss": 3.530498504638672, "step": 12230 }, { "epoch": 0.09880290273887458, "grad_norm": 1.096701979637146, "learning_rate": 1.8035750345306656e-05, "loss": 3.8114505767822267, "step": 12240 }, { "epoch": 0.09888362406464164, "grad_norm": 0.8033063411712646, "learning_rate": 1.8034134875568444e-05, "loss": 3.4600494384765623, "step": 12250 }, { "epoch": 0.09896434539040869, "grad_norm": 1.6165231466293335, "learning_rate": 1.8032519405830232e-05, "loss": 3.249965286254883, "step": 12260 }, { "epoch": 0.09904506671617574, "grad_norm": 1.2225579023361206, "learning_rate": 1.803090393609202e-05, "loss": 3.658645248413086, "step": 12270 }, { "epoch": 0.0991257880419428, "grad_norm": 2.563697576522827, "learning_rate": 1.8029288466353804e-05, "loss": 3.9394863128662108, "step": 12280 }, { "epoch": 0.09920650936770986, "grad_norm": 0.972898542881012, "learning_rate": 1.802767299661559e-05, "loss": 3.460674285888672, "step": 12290 }, { "epoch": 0.0992872306934769, "grad_norm": 1.061221957206726, "learning_rate": 1.802605752687738e-05, "loss": 3.5927761077880858, "step": 12300 }, { "epoch": 0.09936795201924396, "grad_norm": 2.2292864322662354, "learning_rate": 1.8024442057139167e-05, "loss": 3.8500782012939454, "step": 12310 }, { "epoch": 0.09944867334501102, "grad_norm": 0.6454566121101379, "learning_rate": 1.802282658740095e-05, "loss": 3.2876235961914064, "step": 12320 }, { "epoch": 0.09952939467077808, "grad_norm": 1.0748026371002197, "learning_rate": 1.802121111766274e-05, "loss": 3.6493865966796877, "step": 12330 }, { "epoch": 0.09961011599654512, "grad_norm": 1.3053230047225952, "learning_rate": 1.8019595647924527e-05, "loss": 3.580770492553711, "step": 12340 }, { "epoch": 0.09969083732231218, "grad_norm": 1.1784616708755493, "learning_rate": 1.8017980178186314e-05, "loss": 3.3881046295166017, "step": 12350 }, { "epoch": 0.09977155864807924, "grad_norm": 1.4195245504379272, "learning_rate": 1.80163647084481e-05, "loss": 3.7371448516845702, "step": 12360 }, { "epoch": 0.0998522799738463, "grad_norm": 1.1630561351776123, "learning_rate": 1.8014749238709886e-05, "loss": 3.2813556671142576, "step": 12370 }, { "epoch": 0.09993300129961334, "grad_norm": 1.0936247110366821, "learning_rate": 1.8013133768971674e-05, "loss": 3.3159561157226562, "step": 12380 }, { "epoch": 0.1000137226253804, "grad_norm": 1.3683481216430664, "learning_rate": 1.801151829923346e-05, "loss": 3.907678985595703, "step": 12390 }, { "epoch": 0.10009444395114746, "grad_norm": 0.8253676295280457, "learning_rate": 1.8009902829495246e-05, "loss": 3.5162593841552736, "step": 12400 }, { "epoch": 0.1001751652769145, "grad_norm": 1.0101436376571655, "learning_rate": 1.8008287359757034e-05, "loss": 3.4304573059082033, "step": 12410 }, { "epoch": 0.10025588660268156, "grad_norm": 1.0860414505004883, "learning_rate": 1.800667189001882e-05, "loss": 4.2901054382324215, "step": 12420 }, { "epoch": 0.10033660792844862, "grad_norm": 1.1413674354553223, "learning_rate": 1.800505642028061e-05, "loss": 3.6561607360839843, "step": 12430 }, { "epoch": 0.10041732925421568, "grad_norm": 1.076982021331787, "learning_rate": 1.8003440950542393e-05, "loss": 3.4415306091308593, "step": 12440 }, { "epoch": 0.10049805057998272, "grad_norm": 0.5811699032783508, "learning_rate": 1.800182548080418e-05, "loss": 3.476103591918945, "step": 12450 }, { "epoch": 0.10057877190574978, "grad_norm": 0.5615705847740173, "learning_rate": 1.800021001106597e-05, "loss": 3.300608444213867, "step": 12460 }, { "epoch": 0.10065949323151684, "grad_norm": 0.8273527026176453, "learning_rate": 1.7998594541327756e-05, "loss": 3.0663867950439454, "step": 12470 }, { "epoch": 0.1007402145572839, "grad_norm": 0.9312610030174255, "learning_rate": 1.799697907158954e-05, "loss": 4.072696685791016, "step": 12480 }, { "epoch": 0.10082093588305094, "grad_norm": 0.9724265933036804, "learning_rate": 1.799536360185133e-05, "loss": 3.189296913146973, "step": 12490 }, { "epoch": 0.100901657208818, "grad_norm": 0.9111788272857666, "learning_rate": 1.7993748132113116e-05, "loss": 3.579610061645508, "step": 12500 }, { "epoch": 0.10098237853458505, "grad_norm": 0.7523284554481506, "learning_rate": 1.7992132662374904e-05, "loss": 3.7460437774658204, "step": 12510 }, { "epoch": 0.10106309986035211, "grad_norm": 0.6842015981674194, "learning_rate": 1.7990517192636688e-05, "loss": 3.7807029724121093, "step": 12520 }, { "epoch": 0.10114382118611916, "grad_norm": 1.508571743965149, "learning_rate": 1.798890172289848e-05, "loss": 3.4455032348632812, "step": 12530 }, { "epoch": 0.10122454251188621, "grad_norm": 0.9267526865005493, "learning_rate": 1.7987286253160263e-05, "loss": 3.5818603515625, "step": 12540 }, { "epoch": 0.10130526383765327, "grad_norm": 0.9739537239074707, "learning_rate": 1.798567078342205e-05, "loss": 3.156211090087891, "step": 12550 }, { "epoch": 0.10138598516342033, "grad_norm": 0.9585102200508118, "learning_rate": 1.7984055313683835e-05, "loss": 3.2408000946044924, "step": 12560 }, { "epoch": 0.10146670648918737, "grad_norm": 1.0447386503219604, "learning_rate": 1.7982439843945627e-05, "loss": 3.7438591003417967, "step": 12570 }, { "epoch": 0.10154742781495443, "grad_norm": 0.7015555500984192, "learning_rate": 1.798082437420741e-05, "loss": 3.3674720764160155, "step": 12580 }, { "epoch": 0.10162814914072149, "grad_norm": 1.3484022617340088, "learning_rate": 1.79792089044692e-05, "loss": 3.2569942474365234, "step": 12590 }, { "epoch": 0.10170887046648854, "grad_norm": 0.5084355473518372, "learning_rate": 1.7977593434730983e-05, "loss": 3.9629562377929686, "step": 12600 }, { "epoch": 0.1017895917922556, "grad_norm": 1.0558021068572998, "learning_rate": 1.7975977964992774e-05, "loss": 3.6594253540039063, "step": 12610 }, { "epoch": 0.10187031311802265, "grad_norm": 1.270875096321106, "learning_rate": 1.7974362495254558e-05, "loss": 3.580750274658203, "step": 12620 }, { "epoch": 0.10195103444378971, "grad_norm": 1.0660933256149292, "learning_rate": 1.7972747025516346e-05, "loss": 3.4762908935546877, "step": 12630 }, { "epoch": 0.10203175576955675, "grad_norm": 0.6470518112182617, "learning_rate": 1.797113155577813e-05, "loss": 3.4842124938964845, "step": 12640 }, { "epoch": 0.10211247709532381, "grad_norm": 1.3386210203170776, "learning_rate": 1.796951608603992e-05, "loss": 3.2577144622802736, "step": 12650 }, { "epoch": 0.10219319842109087, "grad_norm": 0.8257508873939514, "learning_rate": 1.7967900616301706e-05, "loss": 3.2904430389404298, "step": 12660 }, { "epoch": 0.10227391974685793, "grad_norm": 1.2216041088104248, "learning_rate": 1.7966285146563493e-05, "loss": 3.2183761596679688, "step": 12670 }, { "epoch": 0.10235464107262497, "grad_norm": 1.0638867616653442, "learning_rate": 1.796466967682528e-05, "loss": 2.852990913391113, "step": 12680 }, { "epoch": 0.10243536239839203, "grad_norm": 1.1901792287826538, "learning_rate": 1.796305420708707e-05, "loss": 3.2936031341552736, "step": 12690 }, { "epoch": 0.10251608372415909, "grad_norm": 2.5465152263641357, "learning_rate": 1.7961438737348853e-05, "loss": 3.8625568389892577, "step": 12700 }, { "epoch": 0.10259680504992615, "grad_norm": 0.8070170283317566, "learning_rate": 1.795982326761064e-05, "loss": 3.4957141876220703, "step": 12710 }, { "epoch": 0.10267752637569319, "grad_norm": 1.0476828813552856, "learning_rate": 1.795820779787243e-05, "loss": 3.7476646423339846, "step": 12720 }, { "epoch": 0.10275824770146025, "grad_norm": 1.1146886348724365, "learning_rate": 1.7956592328134216e-05, "loss": 3.8017601013183593, "step": 12730 }, { "epoch": 0.1028389690272273, "grad_norm": 1.0728363990783691, "learning_rate": 1.7954976858396e-05, "loss": 3.4268238067626955, "step": 12740 }, { "epoch": 0.10291969035299436, "grad_norm": 1.4404020309448242, "learning_rate": 1.7953361388657788e-05, "loss": 3.093855857849121, "step": 12750 }, { "epoch": 0.10300041167876141, "grad_norm": 0.9750802516937256, "learning_rate": 1.7951745918919576e-05, "loss": 3.7008724212646484, "step": 12760 }, { "epoch": 0.10308113300452847, "grad_norm": 0.9434695243835449, "learning_rate": 1.7950130449181363e-05, "loss": 3.421401596069336, "step": 12770 }, { "epoch": 0.10316185433029552, "grad_norm": 1.037392020225525, "learning_rate": 1.794851497944315e-05, "loss": 3.2837696075439453, "step": 12780 }, { "epoch": 0.10324257565606257, "grad_norm": 0.947490930557251, "learning_rate": 1.7946899509704935e-05, "loss": 3.7100887298583984, "step": 12790 }, { "epoch": 0.10332329698182963, "grad_norm": 1.2106616497039795, "learning_rate": 1.7945284039966723e-05, "loss": 3.778707504272461, "step": 12800 }, { "epoch": 0.10340401830759668, "grad_norm": 0.7160183787345886, "learning_rate": 1.794366857022851e-05, "loss": 3.3824394226074217, "step": 12810 }, { "epoch": 0.10348473963336374, "grad_norm": 0.9529709815979004, "learning_rate": 1.79420531004903e-05, "loss": 3.50540771484375, "step": 12820 }, { "epoch": 0.10356546095913079, "grad_norm": 1.1965309381484985, "learning_rate": 1.7940437630752083e-05, "loss": 3.4979202270507814, "step": 12830 }, { "epoch": 0.10364618228489784, "grad_norm": 0.9265314340591431, "learning_rate": 1.793882216101387e-05, "loss": 3.044690704345703, "step": 12840 }, { "epoch": 0.1037269036106649, "grad_norm": 1.3740506172180176, "learning_rate": 1.7937206691275658e-05, "loss": 3.374391555786133, "step": 12850 }, { "epoch": 0.10380762493643196, "grad_norm": 1.237956166267395, "learning_rate": 1.7935591221537446e-05, "loss": 3.2593082427978515, "step": 12860 }, { "epoch": 0.103888346262199, "grad_norm": 0.8319298028945923, "learning_rate": 1.793397575179923e-05, "loss": 3.408283233642578, "step": 12870 }, { "epoch": 0.10396906758796606, "grad_norm": 1.1922612190246582, "learning_rate": 1.7932360282061018e-05, "loss": 3.7153564453125, "step": 12880 }, { "epoch": 0.10404978891373312, "grad_norm": 0.7796982526779175, "learning_rate": 1.7930744812322806e-05, "loss": 3.3944557189941404, "step": 12890 }, { "epoch": 0.10413051023950018, "grad_norm": 1.4002598524093628, "learning_rate": 1.7929129342584593e-05, "loss": 3.487334060668945, "step": 12900 }, { "epoch": 0.10421123156526722, "grad_norm": 1.2975265979766846, "learning_rate": 1.7927513872846378e-05, "loss": 3.6901031494140626, "step": 12910 }, { "epoch": 0.10429195289103428, "grad_norm": 1.274964451789856, "learning_rate": 1.7925898403108165e-05, "loss": 3.6497573852539062, "step": 12920 }, { "epoch": 0.10437267421680134, "grad_norm": 1.1435813903808594, "learning_rate": 1.7924282933369953e-05, "loss": 3.5726783752441404, "step": 12930 }, { "epoch": 0.1044533955425684, "grad_norm": 0.8114719986915588, "learning_rate": 1.792266746363174e-05, "loss": 4.188634872436523, "step": 12940 }, { "epoch": 0.10453411686833544, "grad_norm": 0.8529443144798279, "learning_rate": 1.7921051993893525e-05, "loss": 3.5251079559326173, "step": 12950 }, { "epoch": 0.1046148381941025, "grad_norm": 0.7254199385643005, "learning_rate": 1.7919436524155313e-05, "loss": 3.7004085540771485, "step": 12960 }, { "epoch": 0.10469555951986956, "grad_norm": 0.6414648294448853, "learning_rate": 1.79178210544171e-05, "loss": 3.436268997192383, "step": 12970 }, { "epoch": 0.1047762808456366, "grad_norm": 2.044727325439453, "learning_rate": 1.7916205584678888e-05, "loss": 4.039547348022461, "step": 12980 }, { "epoch": 0.10485700217140366, "grad_norm": 0.8977207541465759, "learning_rate": 1.7914590114940672e-05, "loss": 3.3018688201904296, "step": 12990 }, { "epoch": 0.10493772349717072, "grad_norm": 1.1799033880233765, "learning_rate": 1.791297464520246e-05, "loss": 3.535986328125, "step": 13000 }, { "epoch": 0.10501844482293778, "grad_norm": 0.985294759273529, "learning_rate": 1.7911359175464248e-05, "loss": 3.4625362396240233, "step": 13010 }, { "epoch": 0.10509916614870482, "grad_norm": 1.4438199996948242, "learning_rate": 1.7909743705726035e-05, "loss": 3.124562644958496, "step": 13020 }, { "epoch": 0.10517988747447188, "grad_norm": 0.841156005859375, "learning_rate": 1.790812823598782e-05, "loss": 3.3056427001953126, "step": 13030 }, { "epoch": 0.10526060880023894, "grad_norm": 0.7937159538269043, "learning_rate": 1.7906512766249607e-05, "loss": 3.875289535522461, "step": 13040 }, { "epoch": 0.105341330126006, "grad_norm": 1.113740086555481, "learning_rate": 1.7904897296511395e-05, "loss": 3.469962310791016, "step": 13050 }, { "epoch": 0.10542205145177304, "grad_norm": 0.7262736558914185, "learning_rate": 1.7903281826773183e-05, "loss": 3.2161418914794924, "step": 13060 }, { "epoch": 0.1055027727775401, "grad_norm": 2.153160333633423, "learning_rate": 1.7901666357034967e-05, "loss": 3.6453784942626952, "step": 13070 }, { "epoch": 0.10558349410330715, "grad_norm": 0.7391391396522522, "learning_rate": 1.7900050887296755e-05, "loss": 3.3587371826171877, "step": 13080 }, { "epoch": 0.10566421542907421, "grad_norm": 0.9348188042640686, "learning_rate": 1.7898435417558542e-05, "loss": 3.2519412994384767, "step": 13090 }, { "epoch": 0.10574493675484126, "grad_norm": 0.9907675385475159, "learning_rate": 1.789681994782033e-05, "loss": 3.743299865722656, "step": 13100 }, { "epoch": 0.10582565808060831, "grad_norm": 1.4517370462417603, "learning_rate": 1.7895204478082114e-05, "loss": 3.453557586669922, "step": 13110 }, { "epoch": 0.10590637940637537, "grad_norm": 0.8187840580940247, "learning_rate": 1.7893589008343902e-05, "loss": 3.615683746337891, "step": 13120 }, { "epoch": 0.10598710073214243, "grad_norm": 0.6443862318992615, "learning_rate": 1.789197353860569e-05, "loss": 3.4772323608398437, "step": 13130 }, { "epoch": 0.10606782205790947, "grad_norm": 0.7945924997329712, "learning_rate": 1.7890358068867478e-05, "loss": 3.0394567489624023, "step": 13140 }, { "epoch": 0.10614854338367653, "grad_norm": 0.7662642598152161, "learning_rate": 1.7888742599129262e-05, "loss": 3.128749656677246, "step": 13150 }, { "epoch": 0.10622926470944359, "grad_norm": 0.9955745339393616, "learning_rate": 1.788712712939105e-05, "loss": 3.502706527709961, "step": 13160 }, { "epoch": 0.10630998603521065, "grad_norm": 0.8586907982826233, "learning_rate": 1.7885511659652837e-05, "loss": 3.722346878051758, "step": 13170 }, { "epoch": 0.1063907073609777, "grad_norm": 1.0401971340179443, "learning_rate": 1.7883896189914625e-05, "loss": 3.5021175384521483, "step": 13180 }, { "epoch": 0.10647142868674475, "grad_norm": 1.0917407274246216, "learning_rate": 1.788228072017641e-05, "loss": 3.334474563598633, "step": 13190 }, { "epoch": 0.10655215001251181, "grad_norm": 2.0347275733947754, "learning_rate": 1.7880665250438197e-05, "loss": 3.2851642608642577, "step": 13200 }, { "epoch": 0.10663287133827885, "grad_norm": 0.7850371599197388, "learning_rate": 1.7879049780699985e-05, "loss": 3.6186256408691406, "step": 13210 }, { "epoch": 0.10671359266404591, "grad_norm": 0.886070191860199, "learning_rate": 1.7877434310961772e-05, "loss": 3.472636413574219, "step": 13220 }, { "epoch": 0.10679431398981297, "grad_norm": 0.8310872316360474, "learning_rate": 1.7875818841223557e-05, "loss": 3.4933349609375, "step": 13230 }, { "epoch": 0.10687503531558003, "grad_norm": 2.1396725177764893, "learning_rate": 1.7874203371485344e-05, "loss": 3.67169303894043, "step": 13240 }, { "epoch": 0.10695575664134707, "grad_norm": 0.7645307779312134, "learning_rate": 1.7872587901747132e-05, "loss": 3.3931976318359376, "step": 13250 }, { "epoch": 0.10703647796711413, "grad_norm": 0.6402091383934021, "learning_rate": 1.787097243200892e-05, "loss": 3.1925912857055665, "step": 13260 }, { "epoch": 0.10711719929288119, "grad_norm": 1.2973113059997559, "learning_rate": 1.7869356962270704e-05, "loss": 3.690653610229492, "step": 13270 }, { "epoch": 0.10719792061864825, "grad_norm": 0.9348545670509338, "learning_rate": 1.7867741492532492e-05, "loss": 3.980866622924805, "step": 13280 }, { "epoch": 0.10727864194441529, "grad_norm": 1.4400935173034668, "learning_rate": 1.786612602279428e-05, "loss": 3.200180435180664, "step": 13290 }, { "epoch": 0.10735936327018235, "grad_norm": 1.004838466644287, "learning_rate": 1.7864510553056067e-05, "loss": 3.582081985473633, "step": 13300 }, { "epoch": 0.1074400845959494, "grad_norm": 1.1986199617385864, "learning_rate": 1.786289508331785e-05, "loss": 3.1094268798828124, "step": 13310 }, { "epoch": 0.10752080592171646, "grad_norm": 0.9703257083892822, "learning_rate": 1.786127961357964e-05, "loss": 3.471613311767578, "step": 13320 }, { "epoch": 0.10760152724748351, "grad_norm": 1.2462712526321411, "learning_rate": 1.7859664143841427e-05, "loss": 4.08927001953125, "step": 13330 }, { "epoch": 0.10768224857325057, "grad_norm": 1.2692896127700806, "learning_rate": 1.7858048674103214e-05, "loss": 3.518149185180664, "step": 13340 }, { "epoch": 0.10776296989901762, "grad_norm": 1.1130084991455078, "learning_rate": 1.7856433204365e-05, "loss": 3.532430648803711, "step": 13350 }, { "epoch": 0.10784369122478468, "grad_norm": 0.7671642303466797, "learning_rate": 1.7854817734626786e-05, "loss": 3.4243335723876953, "step": 13360 }, { "epoch": 0.10792441255055173, "grad_norm": 1.3672327995300293, "learning_rate": 1.7853202264888574e-05, "loss": 3.7583873748779295, "step": 13370 }, { "epoch": 0.10800513387631878, "grad_norm": 0.9976190328598022, "learning_rate": 1.7851586795150362e-05, "loss": 3.820630645751953, "step": 13380 }, { "epoch": 0.10808585520208584, "grad_norm": 0.9051738977432251, "learning_rate": 1.7849971325412146e-05, "loss": 3.3152481079101563, "step": 13390 }, { "epoch": 0.10816657652785289, "grad_norm": 1.1985307931900024, "learning_rate": 1.7848355855673937e-05, "loss": 3.3093193054199217, "step": 13400 }, { "epoch": 0.10824729785361994, "grad_norm": 0.8330503702163696, "learning_rate": 1.784674038593572e-05, "loss": 3.4064968109130858, "step": 13410 }, { "epoch": 0.108328019179387, "grad_norm": 1.197916030883789, "learning_rate": 1.784512491619751e-05, "loss": 3.5760345458984375, "step": 13420 }, { "epoch": 0.10840874050515406, "grad_norm": 1.4171432256698608, "learning_rate": 1.7843509446459294e-05, "loss": 3.5017189025878905, "step": 13430 }, { "epoch": 0.1084894618309211, "grad_norm": 1.2314223051071167, "learning_rate": 1.7841893976721085e-05, "loss": 3.2698436737060548, "step": 13440 }, { "epoch": 0.10857018315668816, "grad_norm": 0.8790445327758789, "learning_rate": 1.784027850698287e-05, "loss": 3.121916389465332, "step": 13450 }, { "epoch": 0.10865090448245522, "grad_norm": 0.9918376803398132, "learning_rate": 1.7838663037244657e-05, "loss": 2.91823673248291, "step": 13460 }, { "epoch": 0.10873162580822228, "grad_norm": 1.0302397012710571, "learning_rate": 1.783704756750644e-05, "loss": 3.483784484863281, "step": 13470 }, { "epoch": 0.10881234713398932, "grad_norm": 1.1194584369659424, "learning_rate": 1.7835432097768232e-05, "loss": 3.5929100036621096, "step": 13480 }, { "epoch": 0.10889306845975638, "grad_norm": 1.1960670948028564, "learning_rate": 1.7833816628030016e-05, "loss": 3.366714096069336, "step": 13490 }, { "epoch": 0.10897378978552344, "grad_norm": 0.93939608335495, "learning_rate": 1.7832201158291804e-05, "loss": 3.9307476043701173, "step": 13500 }, { "epoch": 0.1090545111112905, "grad_norm": 1.0834813117980957, "learning_rate": 1.783058568855359e-05, "loss": 3.323548126220703, "step": 13510 }, { "epoch": 0.10913523243705754, "grad_norm": 1.2841403484344482, "learning_rate": 1.782897021881538e-05, "loss": 3.6002613067626954, "step": 13520 }, { "epoch": 0.1092159537628246, "grad_norm": 0.73093181848526, "learning_rate": 1.7827354749077164e-05, "loss": 3.549667739868164, "step": 13530 }, { "epoch": 0.10929667508859166, "grad_norm": 1.0171347856521606, "learning_rate": 1.782573927933895e-05, "loss": 3.431717300415039, "step": 13540 }, { "epoch": 0.10937739641435872, "grad_norm": 0.7678283452987671, "learning_rate": 1.782412380960074e-05, "loss": 3.5576641082763674, "step": 13550 }, { "epoch": 0.10945811774012576, "grad_norm": 0.978786051273346, "learning_rate": 1.7822508339862527e-05, "loss": 3.474569320678711, "step": 13560 }, { "epoch": 0.10953883906589282, "grad_norm": 0.8531694412231445, "learning_rate": 1.782089287012431e-05, "loss": 4.417574691772461, "step": 13570 }, { "epoch": 0.10961956039165988, "grad_norm": 1.2831333875656128, "learning_rate": 1.78192774003861e-05, "loss": 3.0507492065429687, "step": 13580 }, { "epoch": 0.10970028171742692, "grad_norm": 1.0162214040756226, "learning_rate": 1.7817661930647886e-05, "loss": 3.3966953277587892, "step": 13590 }, { "epoch": 0.10978100304319398, "grad_norm": 1.454347848892212, "learning_rate": 1.7816046460909674e-05, "loss": 3.4729610443115235, "step": 13600 }, { "epoch": 0.10986172436896104, "grad_norm": 1.3452873229980469, "learning_rate": 1.781443099117146e-05, "loss": 3.6397098541259765, "step": 13610 }, { "epoch": 0.1099424456947281, "grad_norm": 1.4861350059509277, "learning_rate": 1.7812815521433246e-05, "loss": 3.2758407592773438, "step": 13620 }, { "epoch": 0.11002316702049514, "grad_norm": 1.069792628288269, "learning_rate": 1.7811200051695034e-05, "loss": 3.4814815521240234, "step": 13630 }, { "epoch": 0.1101038883462622, "grad_norm": 0.966259777545929, "learning_rate": 1.780958458195682e-05, "loss": 2.9620086669921877, "step": 13640 }, { "epoch": 0.11018460967202925, "grad_norm": 0.6918461322784424, "learning_rate": 1.7807969112218606e-05, "loss": 3.373994827270508, "step": 13650 }, { "epoch": 0.11026533099779631, "grad_norm": 1.6195155382156372, "learning_rate": 1.7806353642480394e-05, "loss": 3.712490463256836, "step": 13660 }, { "epoch": 0.11034605232356336, "grad_norm": 0.619921863079071, "learning_rate": 1.780473817274218e-05, "loss": 3.46351318359375, "step": 13670 }, { "epoch": 0.11042677364933041, "grad_norm": 1.1685112714767456, "learning_rate": 1.780312270300397e-05, "loss": 3.109944152832031, "step": 13680 }, { "epoch": 0.11050749497509747, "grad_norm": 1.1610980033874512, "learning_rate": 1.7801507233265753e-05, "loss": 3.730328369140625, "step": 13690 }, { "epoch": 0.11058821630086453, "grad_norm": 0.9358886480331421, "learning_rate": 1.779989176352754e-05, "loss": 3.157176208496094, "step": 13700 }, { "epoch": 0.11066893762663157, "grad_norm": 1.3239823579788208, "learning_rate": 1.779827629378933e-05, "loss": 4.062226486206055, "step": 13710 }, { "epoch": 0.11074965895239863, "grad_norm": 1.3253624439239502, "learning_rate": 1.7796660824051116e-05, "loss": 3.589504623413086, "step": 13720 }, { "epoch": 0.11083038027816569, "grad_norm": 1.5622692108154297, "learning_rate": 1.77950453543129e-05, "loss": 3.6729103088378907, "step": 13730 }, { "epoch": 0.11091110160393275, "grad_norm": 1.2471020221710205, "learning_rate": 1.7793429884574688e-05, "loss": 3.492734909057617, "step": 13740 }, { "epoch": 0.1109918229296998, "grad_norm": 0.9276240468025208, "learning_rate": 1.7791814414836476e-05, "loss": 3.7156742095947264, "step": 13750 }, { "epoch": 0.11107254425546685, "grad_norm": 0.65859055519104, "learning_rate": 1.7790198945098264e-05, "loss": 3.441787338256836, "step": 13760 }, { "epoch": 0.11115326558123391, "grad_norm": 0.8830935955047607, "learning_rate": 1.7788583475360048e-05, "loss": 3.8664051055908204, "step": 13770 }, { "epoch": 0.11123398690700097, "grad_norm": 1.002598524093628, "learning_rate": 1.7786968005621836e-05, "loss": 3.153526496887207, "step": 13780 }, { "epoch": 0.11131470823276801, "grad_norm": 1.2173675298690796, "learning_rate": 1.7785352535883623e-05, "loss": 3.4167221069335936, "step": 13790 }, { "epoch": 0.11139542955853507, "grad_norm": 0.9230808019638062, "learning_rate": 1.778373706614541e-05, "loss": 3.231935501098633, "step": 13800 }, { "epoch": 0.11147615088430213, "grad_norm": 1.9206197261810303, "learning_rate": 1.7782121596407195e-05, "loss": 3.335480499267578, "step": 13810 }, { "epoch": 0.11155687221006917, "grad_norm": 1.1265220642089844, "learning_rate": 1.7780506126668983e-05, "loss": 3.8159561157226562, "step": 13820 }, { "epoch": 0.11163759353583623, "grad_norm": 1.2671643495559692, "learning_rate": 1.777889065693077e-05, "loss": 3.3980560302734375, "step": 13830 }, { "epoch": 0.11171831486160329, "grad_norm": 1.2538107633590698, "learning_rate": 1.777727518719256e-05, "loss": 3.793490982055664, "step": 13840 }, { "epoch": 0.11179903618737035, "grad_norm": 1.1231571435928345, "learning_rate": 1.7775659717454343e-05, "loss": 4.407815933227539, "step": 13850 }, { "epoch": 0.11187975751313739, "grad_norm": 0.7516725659370422, "learning_rate": 1.777404424771613e-05, "loss": 3.7626651763916015, "step": 13860 }, { "epoch": 0.11196047883890445, "grad_norm": 1.5005881786346436, "learning_rate": 1.7772428777977918e-05, "loss": 4.007712936401367, "step": 13870 }, { "epoch": 0.1120412001646715, "grad_norm": 0.718030571937561, "learning_rate": 1.7770813308239706e-05, "loss": 3.559879684448242, "step": 13880 }, { "epoch": 0.11212192149043856, "grad_norm": 1.4404479265213013, "learning_rate": 1.776919783850149e-05, "loss": 4.1242317199707035, "step": 13890 }, { "epoch": 0.11220264281620561, "grad_norm": 0.8155930638313293, "learning_rate": 1.7767582368763278e-05, "loss": 3.9262836456298826, "step": 13900 }, { "epoch": 0.11228336414197267, "grad_norm": 1.3320869207382202, "learning_rate": 1.7765966899025066e-05, "loss": 3.265176010131836, "step": 13910 }, { "epoch": 0.11236408546773972, "grad_norm": 1.28706693649292, "learning_rate": 1.7764351429286853e-05, "loss": 2.9590875625610353, "step": 13920 }, { "epoch": 0.11244480679350678, "grad_norm": 1.0602996349334717, "learning_rate": 1.7762735959548638e-05, "loss": 3.493212890625, "step": 13930 }, { "epoch": 0.11252552811927383, "grad_norm": 1.0318547487258911, "learning_rate": 1.7761120489810425e-05, "loss": 3.4728721618652343, "step": 13940 }, { "epoch": 0.11260624944504088, "grad_norm": 0.9603176712989807, "learning_rate": 1.7759505020072213e-05, "loss": 3.5612945556640625, "step": 13950 }, { "epoch": 0.11268697077080794, "grad_norm": 1.1179713010787964, "learning_rate": 1.7757889550334e-05, "loss": 3.0216068267822265, "step": 13960 }, { "epoch": 0.112767692096575, "grad_norm": 1.1640952825546265, "learning_rate": 1.7756274080595785e-05, "loss": 3.784112548828125, "step": 13970 }, { "epoch": 0.11284841342234204, "grad_norm": 1.2002819776535034, "learning_rate": 1.7754658610857573e-05, "loss": 3.7075145721435545, "step": 13980 }, { "epoch": 0.1129291347481091, "grad_norm": 0.9102616906166077, "learning_rate": 1.775304314111936e-05, "loss": 3.1724676132202148, "step": 13990 }, { "epoch": 0.11300985607387616, "grad_norm": 1.002197504043579, "learning_rate": 1.7751427671381148e-05, "loss": 3.902700424194336, "step": 14000 }, { "epoch": 0.1130905773996432, "grad_norm": 0.6554461121559143, "learning_rate": 1.7749812201642936e-05, "loss": 3.420465850830078, "step": 14010 }, { "epoch": 0.11317129872541026, "grad_norm": 1.4944119453430176, "learning_rate": 1.774819673190472e-05, "loss": 3.6167327880859377, "step": 14020 }, { "epoch": 0.11325202005117732, "grad_norm": 0.6660603284835815, "learning_rate": 1.7746581262166508e-05, "loss": 3.2409557342529296, "step": 14030 }, { "epoch": 0.11333274137694438, "grad_norm": 1.128443956375122, "learning_rate": 1.7744965792428295e-05, "loss": 3.611113739013672, "step": 14040 }, { "epoch": 0.11341346270271142, "grad_norm": 0.9507812857627869, "learning_rate": 1.7743350322690083e-05, "loss": 3.5071224212646483, "step": 14050 }, { "epoch": 0.11349418402847848, "grad_norm": 1.366013526916504, "learning_rate": 1.7741734852951867e-05, "loss": 3.2373226165771483, "step": 14060 }, { "epoch": 0.11357490535424554, "grad_norm": 0.7672097682952881, "learning_rate": 1.7740119383213655e-05, "loss": 3.15423641204834, "step": 14070 }, { "epoch": 0.1136556266800126, "grad_norm": 1.6793930530548096, "learning_rate": 1.7738503913475443e-05, "loss": 3.0693504333496096, "step": 14080 }, { "epoch": 0.11373634800577964, "grad_norm": 1.0566350221633911, "learning_rate": 1.773688844373723e-05, "loss": 3.0924312591552736, "step": 14090 }, { "epoch": 0.1138170693315467, "grad_norm": 1.0064839124679565, "learning_rate": 1.7735272973999015e-05, "loss": 3.1981502532958985, "step": 14100 }, { "epoch": 0.11389779065731376, "grad_norm": 0.6750907897949219, "learning_rate": 1.7733657504260802e-05, "loss": 3.088506507873535, "step": 14110 }, { "epoch": 0.11397851198308082, "grad_norm": 1.2074052095413208, "learning_rate": 1.773204203452259e-05, "loss": 3.779794692993164, "step": 14120 }, { "epoch": 0.11405923330884786, "grad_norm": 1.3177810907363892, "learning_rate": 1.7730426564784378e-05, "loss": 3.2782833099365236, "step": 14130 }, { "epoch": 0.11413995463461492, "grad_norm": 1.2121280431747437, "learning_rate": 1.7728811095046162e-05, "loss": 3.0652576446533204, "step": 14140 }, { "epoch": 0.11422067596038198, "grad_norm": 1.4718828201293945, "learning_rate": 1.772719562530795e-05, "loss": 3.186142921447754, "step": 14150 }, { "epoch": 0.11430139728614903, "grad_norm": 1.2933496236801147, "learning_rate": 1.7725580155569738e-05, "loss": 3.5096736907958985, "step": 14160 }, { "epoch": 0.11438211861191608, "grad_norm": 1.0312010049819946, "learning_rate": 1.7723964685831525e-05, "loss": 3.349032974243164, "step": 14170 }, { "epoch": 0.11446283993768314, "grad_norm": 1.1315159797668457, "learning_rate": 1.772234921609331e-05, "loss": 3.182252883911133, "step": 14180 }, { "epoch": 0.1145435612634502, "grad_norm": 1.2997183799743652, "learning_rate": 1.7720733746355097e-05, "loss": 3.8316707611083984, "step": 14190 }, { "epoch": 0.11462428258921724, "grad_norm": 0.7709789276123047, "learning_rate": 1.7719118276616885e-05, "loss": 3.834306335449219, "step": 14200 }, { "epoch": 0.1147050039149843, "grad_norm": 0.8433240056037903, "learning_rate": 1.7717502806878673e-05, "loss": 3.7075557708740234, "step": 14210 }, { "epoch": 0.11478572524075135, "grad_norm": 1.2422330379486084, "learning_rate": 1.7715887337140457e-05, "loss": 3.4844638824462892, "step": 14220 }, { "epoch": 0.11486644656651841, "grad_norm": 1.9783390760421753, "learning_rate": 1.7714271867402245e-05, "loss": 3.6293609619140623, "step": 14230 }, { "epoch": 0.11494716789228546, "grad_norm": 0.6099735498428345, "learning_rate": 1.7712656397664032e-05, "loss": 3.4136051177978515, "step": 14240 }, { "epoch": 0.11502788921805251, "grad_norm": 1.396653413772583, "learning_rate": 1.771104092792582e-05, "loss": 3.79498176574707, "step": 14250 }, { "epoch": 0.11510861054381957, "grad_norm": 0.6828536987304688, "learning_rate": 1.7709425458187604e-05, "loss": 3.5153427124023438, "step": 14260 }, { "epoch": 0.11518933186958663, "grad_norm": 1.2250699996948242, "learning_rate": 1.7707809988449395e-05, "loss": 3.317091369628906, "step": 14270 }, { "epoch": 0.11527005319535368, "grad_norm": 0.909327507019043, "learning_rate": 1.770619451871118e-05, "loss": 3.4244735717773436, "step": 14280 }, { "epoch": 0.11535077452112073, "grad_norm": 1.0539733171463013, "learning_rate": 1.7704579048972967e-05, "loss": 3.6722583770751953, "step": 14290 }, { "epoch": 0.11543149584688779, "grad_norm": 1.6996948719024658, "learning_rate": 1.770296357923475e-05, "loss": 3.5778221130371093, "step": 14300 }, { "epoch": 0.11551221717265485, "grad_norm": 0.5544828772544861, "learning_rate": 1.7701348109496543e-05, "loss": 3.641347885131836, "step": 14310 }, { "epoch": 0.1155929384984219, "grad_norm": 1.3837637901306152, "learning_rate": 1.7699732639758327e-05, "loss": 3.4351581573486327, "step": 14320 }, { "epoch": 0.11567365982418895, "grad_norm": 1.474721908569336, "learning_rate": 1.7698117170020115e-05, "loss": 3.346834182739258, "step": 14330 }, { "epoch": 0.11575438114995601, "grad_norm": 1.1886237859725952, "learning_rate": 1.76965017002819e-05, "loss": 3.7039539337158205, "step": 14340 }, { "epoch": 0.11583510247572307, "grad_norm": 1.2763030529022217, "learning_rate": 1.769488623054369e-05, "loss": 3.300165557861328, "step": 14350 }, { "epoch": 0.11591582380149011, "grad_norm": 1.0062522888183594, "learning_rate": 1.7693270760805474e-05, "loss": 3.682977294921875, "step": 14360 }, { "epoch": 0.11599654512725717, "grad_norm": 1.369859218597412, "learning_rate": 1.7691655291067262e-05, "loss": 4.100421905517578, "step": 14370 }, { "epoch": 0.11607726645302423, "grad_norm": 1.739598274230957, "learning_rate": 1.7690039821329046e-05, "loss": 3.6940162658691404, "step": 14380 }, { "epoch": 0.11615798777879129, "grad_norm": 1.3064442873001099, "learning_rate": 1.7688424351590838e-05, "loss": 3.0852561950683595, "step": 14390 }, { "epoch": 0.11623870910455833, "grad_norm": 0.7765492796897888, "learning_rate": 1.7686808881852622e-05, "loss": 3.5983963012695312, "step": 14400 }, { "epoch": 0.11631943043032539, "grad_norm": 0.6035701632499695, "learning_rate": 1.768519341211441e-05, "loss": 3.7354782104492186, "step": 14410 }, { "epoch": 0.11640015175609245, "grad_norm": 1.3500909805297852, "learning_rate": 1.7683577942376197e-05, "loss": 3.4379833221435545, "step": 14420 }, { "epoch": 0.11648087308185949, "grad_norm": 1.098097801208496, "learning_rate": 1.7681962472637985e-05, "loss": 3.464509963989258, "step": 14430 }, { "epoch": 0.11656159440762655, "grad_norm": 1.1147584915161133, "learning_rate": 1.768034700289977e-05, "loss": 3.671160888671875, "step": 14440 }, { "epoch": 0.1166423157333936, "grad_norm": 1.1099202632904053, "learning_rate": 1.7678731533161557e-05, "loss": 3.6597969055175783, "step": 14450 }, { "epoch": 0.11672303705916066, "grad_norm": 1.1893411874771118, "learning_rate": 1.7677116063423345e-05, "loss": 3.2400062561035154, "step": 14460 }, { "epoch": 0.11680375838492771, "grad_norm": 1.5092859268188477, "learning_rate": 1.7675500593685132e-05, "loss": 3.9438079833984374, "step": 14470 }, { "epoch": 0.11688447971069477, "grad_norm": 0.9341883063316345, "learning_rate": 1.7673885123946917e-05, "loss": 3.409164047241211, "step": 14480 }, { "epoch": 0.11696520103646182, "grad_norm": 1.285333275794983, "learning_rate": 1.7672269654208704e-05, "loss": 3.6455242156982424, "step": 14490 }, { "epoch": 0.11704592236222888, "grad_norm": 1.5027525424957275, "learning_rate": 1.7670654184470492e-05, "loss": 3.6644481658935546, "step": 14500 }, { "epoch": 0.11712664368799593, "grad_norm": 1.2673778533935547, "learning_rate": 1.766903871473228e-05, "loss": 3.2050872802734376, "step": 14510 }, { "epoch": 0.11720736501376298, "grad_norm": 0.9449901580810547, "learning_rate": 1.7667423244994064e-05, "loss": 3.0835498809814452, "step": 14520 }, { "epoch": 0.11728808633953004, "grad_norm": 0.7455676794052124, "learning_rate": 1.766580777525585e-05, "loss": 3.6161571502685548, "step": 14530 }, { "epoch": 0.1173688076652971, "grad_norm": 1.1823776960372925, "learning_rate": 1.766419230551764e-05, "loss": 3.206714630126953, "step": 14540 }, { "epoch": 0.11744952899106414, "grad_norm": 1.1221345663070679, "learning_rate": 1.7662576835779427e-05, "loss": 3.567538833618164, "step": 14550 }, { "epoch": 0.1175302503168312, "grad_norm": 0.7220436334609985, "learning_rate": 1.766096136604121e-05, "loss": 3.836913299560547, "step": 14560 }, { "epoch": 0.11761097164259826, "grad_norm": 0.8840430974960327, "learning_rate": 1.7659345896303e-05, "loss": 3.3930892944335938, "step": 14570 }, { "epoch": 0.11769169296836532, "grad_norm": 1.2761290073394775, "learning_rate": 1.7657730426564787e-05, "loss": 3.2798343658447267, "step": 14580 }, { "epoch": 0.11777241429413236, "grad_norm": 1.2068129777908325, "learning_rate": 1.7656114956826574e-05, "loss": 3.5825103759765624, "step": 14590 }, { "epoch": 0.11785313561989942, "grad_norm": 1.2540608644485474, "learning_rate": 1.765449948708836e-05, "loss": 3.009800148010254, "step": 14600 }, { "epoch": 0.11793385694566648, "grad_norm": 1.2187066078186035, "learning_rate": 1.7652884017350146e-05, "loss": 3.5627464294433593, "step": 14610 }, { "epoch": 0.11801457827143352, "grad_norm": 0.7698653340339661, "learning_rate": 1.7651268547611934e-05, "loss": 3.450712203979492, "step": 14620 }, { "epoch": 0.11809529959720058, "grad_norm": 1.3056704998016357, "learning_rate": 1.7649653077873722e-05, "loss": 3.2978267669677734, "step": 14630 }, { "epoch": 0.11817602092296764, "grad_norm": 0.9176090359687805, "learning_rate": 1.7648037608135506e-05, "loss": 3.485057830810547, "step": 14640 }, { "epoch": 0.1182567422487347, "grad_norm": 1.0179173946380615, "learning_rate": 1.7646422138397294e-05, "loss": 3.2375938415527346, "step": 14650 }, { "epoch": 0.11833746357450174, "grad_norm": 0.6164001226425171, "learning_rate": 1.764480666865908e-05, "loss": 3.5425220489501954, "step": 14660 }, { "epoch": 0.1184181849002688, "grad_norm": 1.1737759113311768, "learning_rate": 1.764319119892087e-05, "loss": 4.066654968261719, "step": 14670 }, { "epoch": 0.11849890622603586, "grad_norm": 1.1373932361602783, "learning_rate": 1.7641575729182653e-05, "loss": 3.5303268432617188, "step": 14680 }, { "epoch": 0.11857962755180292, "grad_norm": 1.0102802515029907, "learning_rate": 1.763996025944444e-05, "loss": 3.2507965087890627, "step": 14690 }, { "epoch": 0.11866034887756996, "grad_norm": 0.6611341834068298, "learning_rate": 1.763834478970623e-05, "loss": 3.650251007080078, "step": 14700 }, { "epoch": 0.11874107020333702, "grad_norm": 0.651219367980957, "learning_rate": 1.7636729319968017e-05, "loss": 3.572590637207031, "step": 14710 }, { "epoch": 0.11882179152910408, "grad_norm": 1.0518853664398193, "learning_rate": 1.76351138502298e-05, "loss": 3.2243606567382814, "step": 14720 }, { "epoch": 0.11890251285487113, "grad_norm": 0.9603158235549927, "learning_rate": 1.763349838049159e-05, "loss": 3.574595260620117, "step": 14730 }, { "epoch": 0.11898323418063818, "grad_norm": 1.1603212356567383, "learning_rate": 1.7631882910753376e-05, "loss": 2.7873537063598635, "step": 14740 }, { "epoch": 0.11906395550640524, "grad_norm": 1.1585265398025513, "learning_rate": 1.7630267441015164e-05, "loss": 3.375334548950195, "step": 14750 }, { "epoch": 0.1191446768321723, "grad_norm": 1.471269965171814, "learning_rate": 1.7628651971276948e-05, "loss": 4.614651107788086, "step": 14760 }, { "epoch": 0.11922539815793935, "grad_norm": 1.5864958763122559, "learning_rate": 1.7627036501538736e-05, "loss": 4.018650817871094, "step": 14770 }, { "epoch": 0.1193061194837064, "grad_norm": 0.8767651319503784, "learning_rate": 1.7625421031800524e-05, "loss": 3.8400367736816405, "step": 14780 }, { "epoch": 0.11938684080947345, "grad_norm": 0.7152183055877686, "learning_rate": 1.762380556206231e-05, "loss": 3.480134963989258, "step": 14790 }, { "epoch": 0.11946756213524051, "grad_norm": 1.052699327468872, "learning_rate": 1.7622190092324096e-05, "loss": 3.2620513916015623, "step": 14800 }, { "epoch": 0.11954828346100756, "grad_norm": 0.6097494959831238, "learning_rate": 1.7620574622585883e-05, "loss": 3.618550491333008, "step": 14810 }, { "epoch": 0.11962900478677461, "grad_norm": 1.1119188070297241, "learning_rate": 1.761895915284767e-05, "loss": 3.201010894775391, "step": 14820 }, { "epoch": 0.11970972611254167, "grad_norm": 1.1298054456710815, "learning_rate": 1.761734368310946e-05, "loss": 3.5420665740966797, "step": 14830 }, { "epoch": 0.11979044743830873, "grad_norm": 0.702376663684845, "learning_rate": 1.7615728213371243e-05, "loss": 3.1964494705200197, "step": 14840 }, { "epoch": 0.11987116876407578, "grad_norm": 0.7833184599876404, "learning_rate": 1.761411274363303e-05, "loss": 3.563532257080078, "step": 14850 }, { "epoch": 0.11995189008984283, "grad_norm": 1.8774075508117676, "learning_rate": 1.761249727389482e-05, "loss": 3.119360160827637, "step": 14860 }, { "epoch": 0.12003261141560989, "grad_norm": 0.7680709362030029, "learning_rate": 1.7610881804156606e-05, "loss": 3.5077152252197266, "step": 14870 }, { "epoch": 0.12011333274137695, "grad_norm": 0.8492613434791565, "learning_rate": 1.760926633441839e-05, "loss": 3.5760597229003905, "step": 14880 }, { "epoch": 0.120194054067144, "grad_norm": 1.957531213760376, "learning_rate": 1.7607650864680178e-05, "loss": 3.5546051025390626, "step": 14890 }, { "epoch": 0.12027477539291105, "grad_norm": 1.1382701396942139, "learning_rate": 1.7606035394941966e-05, "loss": 3.185445213317871, "step": 14900 }, { "epoch": 0.12035549671867811, "grad_norm": 1.5587636232376099, "learning_rate": 1.7604419925203753e-05, "loss": 3.1607860565185546, "step": 14910 }, { "epoch": 0.12043621804444517, "grad_norm": 1.1658730506896973, "learning_rate": 1.7602804455465538e-05, "loss": 3.8714733123779297, "step": 14920 }, { "epoch": 0.12051693937021221, "grad_norm": 0.8251087665557861, "learning_rate": 1.7601188985727325e-05, "loss": 3.1784688949584963, "step": 14930 }, { "epoch": 0.12059766069597927, "grad_norm": 0.8795734643936157, "learning_rate": 1.7599573515989113e-05, "loss": 3.324964904785156, "step": 14940 }, { "epoch": 0.12067838202174633, "grad_norm": 0.976872444152832, "learning_rate": 1.75979580462509e-05, "loss": 3.4131328582763674, "step": 14950 }, { "epoch": 0.12075910334751339, "grad_norm": 1.0502489805221558, "learning_rate": 1.7596342576512685e-05, "loss": 3.3600879669189454, "step": 14960 }, { "epoch": 0.12083982467328043, "grad_norm": 0.9518268704414368, "learning_rate": 1.7594727106774473e-05, "loss": 2.9296358108520506, "step": 14970 }, { "epoch": 0.12092054599904749, "grad_norm": 0.6464422345161438, "learning_rate": 1.759311163703626e-05, "loss": 3.5932708740234376, "step": 14980 }, { "epoch": 0.12100126732481455, "grad_norm": 0.938061535358429, "learning_rate": 1.7591496167298048e-05, "loss": 3.1765392303466795, "step": 14990 }, { "epoch": 0.1210819886505816, "grad_norm": 0.7592253088951111, "learning_rate": 1.7589880697559833e-05, "loss": 3.3493473052978517, "step": 15000 }, { "epoch": 0.12116270997634865, "grad_norm": 1.3534802198410034, "learning_rate": 1.758826522782162e-05, "loss": 3.3361801147460937, "step": 15010 }, { "epoch": 0.1212434313021157, "grad_norm": 0.9923332333564758, "learning_rate": 1.7586649758083408e-05, "loss": 3.732892608642578, "step": 15020 }, { "epoch": 0.12132415262788276, "grad_norm": 1.3960225582122803, "learning_rate": 1.7585034288345196e-05, "loss": 3.3482902526855467, "step": 15030 }, { "epoch": 0.12140487395364981, "grad_norm": 1.2140017747879028, "learning_rate": 1.758341881860698e-05, "loss": 3.755510711669922, "step": 15040 }, { "epoch": 0.12148559527941687, "grad_norm": 2.09501051902771, "learning_rate": 1.7581803348868768e-05, "loss": 3.3518436431884764, "step": 15050 }, { "epoch": 0.12156631660518392, "grad_norm": 1.50418221950531, "learning_rate": 1.7580187879130555e-05, "loss": 3.2128170013427733, "step": 15060 }, { "epoch": 0.12164703793095098, "grad_norm": 1.0717315673828125, "learning_rate": 1.7578572409392343e-05, "loss": 2.9994916915893555, "step": 15070 }, { "epoch": 0.12172775925671803, "grad_norm": 0.952875018119812, "learning_rate": 1.7576956939654127e-05, "loss": 3.157169723510742, "step": 15080 }, { "epoch": 0.12180848058248508, "grad_norm": 0.6100026965141296, "learning_rate": 1.7575341469915915e-05, "loss": 3.783382034301758, "step": 15090 }, { "epoch": 0.12188920190825214, "grad_norm": 1.0136654376983643, "learning_rate": 1.7573726000177703e-05, "loss": 3.617051696777344, "step": 15100 }, { "epoch": 0.1219699232340192, "grad_norm": 1.3683826923370361, "learning_rate": 1.757211053043949e-05, "loss": 4.091848754882813, "step": 15110 }, { "epoch": 0.12205064455978624, "grad_norm": 1.0416009426116943, "learning_rate": 1.7570495060701275e-05, "loss": 3.2357131958007814, "step": 15120 }, { "epoch": 0.1221313658855533, "grad_norm": 0.9444455504417419, "learning_rate": 1.7568879590963062e-05, "loss": 3.4203125, "step": 15130 }, { "epoch": 0.12221208721132036, "grad_norm": 1.237135887145996, "learning_rate": 1.756726412122485e-05, "loss": 3.3767230987548826, "step": 15140 }, { "epoch": 0.12229280853708742, "grad_norm": 1.1348797082901, "learning_rate": 1.7565648651486638e-05, "loss": 3.482408905029297, "step": 15150 }, { "epoch": 0.12237352986285446, "grad_norm": 0.9799795746803284, "learning_rate": 1.7564033181748422e-05, "loss": 4.011640167236328, "step": 15160 }, { "epoch": 0.12245425118862152, "grad_norm": 0.9971061944961548, "learning_rate": 1.756241771201021e-05, "loss": 4.250944137573242, "step": 15170 }, { "epoch": 0.12253497251438858, "grad_norm": 1.3006311655044556, "learning_rate": 1.7560802242271997e-05, "loss": 3.1970613479614256, "step": 15180 }, { "epoch": 0.12261569384015564, "grad_norm": 1.5247001647949219, "learning_rate": 1.7559186772533785e-05, "loss": 3.58361930847168, "step": 15190 }, { "epoch": 0.12269641516592268, "grad_norm": 0.7086585760116577, "learning_rate": 1.755757130279557e-05, "loss": 3.3623092651367186, "step": 15200 }, { "epoch": 0.12277713649168974, "grad_norm": 1.3378357887268066, "learning_rate": 1.7555955833057357e-05, "loss": 3.4488365173339846, "step": 15210 }, { "epoch": 0.1228578578174568, "grad_norm": 3.001786947250366, "learning_rate": 1.7554340363319145e-05, "loss": 4.001404190063477, "step": 15220 }, { "epoch": 0.12293857914322384, "grad_norm": 0.9787221550941467, "learning_rate": 1.7552724893580933e-05, "loss": 3.341904067993164, "step": 15230 }, { "epoch": 0.1230193004689909, "grad_norm": 0.834627628326416, "learning_rate": 1.7551109423842717e-05, "loss": 3.6564666748046877, "step": 15240 }, { "epoch": 0.12310002179475796, "grad_norm": 0.7174093127250671, "learning_rate": 1.7549493954104505e-05, "loss": 2.9236581802368162, "step": 15250 }, { "epoch": 0.12318074312052502, "grad_norm": 1.432752013206482, "learning_rate": 1.7547878484366296e-05, "loss": 3.0642929077148438, "step": 15260 }, { "epoch": 0.12326146444629206, "grad_norm": 0.9490665197372437, "learning_rate": 1.754626301462808e-05, "loss": 3.5798870086669923, "step": 15270 }, { "epoch": 0.12334218577205912, "grad_norm": 0.8722389340400696, "learning_rate": 1.7544647544889868e-05, "loss": 3.5739791870117186, "step": 15280 }, { "epoch": 0.12342290709782618, "grad_norm": 0.8320950269699097, "learning_rate": 1.7543032075151655e-05, "loss": 3.6057819366455077, "step": 15290 }, { "epoch": 0.12350362842359323, "grad_norm": 0.57487952709198, "learning_rate": 1.7541416605413443e-05, "loss": 3.502931594848633, "step": 15300 }, { "epoch": 0.12358434974936028, "grad_norm": 0.8585942387580872, "learning_rate": 1.7539801135675227e-05, "loss": 3.3583465576171876, "step": 15310 }, { "epoch": 0.12366507107512734, "grad_norm": 0.9717778563499451, "learning_rate": 1.7538185665937015e-05, "loss": 3.256520462036133, "step": 15320 }, { "epoch": 0.1237457924008944, "grad_norm": 1.1257293224334717, "learning_rate": 1.7536570196198803e-05, "loss": 3.6846759796142576, "step": 15330 }, { "epoch": 0.12382651372666145, "grad_norm": 1.0099955797195435, "learning_rate": 1.753495472646059e-05, "loss": 2.9330224990844727, "step": 15340 }, { "epoch": 0.1239072350524285, "grad_norm": 0.85218346118927, "learning_rate": 1.7533339256722375e-05, "loss": 3.8501350402832033, "step": 15350 }, { "epoch": 0.12398795637819555, "grad_norm": 0.799975574016571, "learning_rate": 1.7531723786984162e-05, "loss": 3.225778579711914, "step": 15360 }, { "epoch": 0.12406867770396261, "grad_norm": 1.213304042816162, "learning_rate": 1.753010831724595e-05, "loss": 3.200006103515625, "step": 15370 }, { "epoch": 0.12414939902972967, "grad_norm": 1.7282142639160156, "learning_rate": 1.7528492847507738e-05, "loss": 3.607054901123047, "step": 15380 }, { "epoch": 0.12423012035549671, "grad_norm": 1.2047162055969238, "learning_rate": 1.7526877377769522e-05, "loss": 3.642271041870117, "step": 15390 }, { "epoch": 0.12431084168126377, "grad_norm": 0.9474931955337524, "learning_rate": 1.752526190803131e-05, "loss": 3.1243377685546876, "step": 15400 }, { "epoch": 0.12439156300703083, "grad_norm": 0.665981113910675, "learning_rate": 1.7523646438293097e-05, "loss": 3.501953125, "step": 15410 }, { "epoch": 0.12447228433279788, "grad_norm": 1.5783168077468872, "learning_rate": 1.7522030968554885e-05, "loss": 3.602911376953125, "step": 15420 }, { "epoch": 0.12455300565856493, "grad_norm": 1.8920432329177856, "learning_rate": 1.752041549881667e-05, "loss": 3.7723079681396485, "step": 15430 }, { "epoch": 0.12463372698433199, "grad_norm": 1.0136668682098389, "learning_rate": 1.7518800029078457e-05, "loss": 3.3051132202148437, "step": 15440 }, { "epoch": 0.12471444831009905, "grad_norm": 1.190430998802185, "learning_rate": 1.7517184559340245e-05, "loss": 3.004551887512207, "step": 15450 }, { "epoch": 0.1247951696358661, "grad_norm": 0.9989956021308899, "learning_rate": 1.7515569089602033e-05, "loss": 4.100632858276367, "step": 15460 }, { "epoch": 0.12487589096163315, "grad_norm": 1.0755558013916016, "learning_rate": 1.7513953619863817e-05, "loss": 3.139237976074219, "step": 15470 }, { "epoch": 0.12495661228740021, "grad_norm": 0.7135604619979858, "learning_rate": 1.7512338150125605e-05, "loss": 4.111958694458008, "step": 15480 }, { "epoch": 0.12503733361316727, "grad_norm": 1.3137218952178955, "learning_rate": 1.7510722680387392e-05, "loss": 3.6643272399902345, "step": 15490 }, { "epoch": 0.12511805493893433, "grad_norm": 0.8476197719573975, "learning_rate": 1.750910721064918e-05, "loss": 3.346664810180664, "step": 15500 }, { "epoch": 0.12519877626470138, "grad_norm": 0.9475691914558411, "learning_rate": 1.7507491740910964e-05, "loss": 3.4921630859375, "step": 15510 }, { "epoch": 0.1252794975904684, "grad_norm": 0.9388152360916138, "learning_rate": 1.7505876271172752e-05, "loss": 3.7042861938476563, "step": 15520 }, { "epoch": 0.12536021891623547, "grad_norm": 1.1121817827224731, "learning_rate": 1.750426080143454e-05, "loss": 3.346242904663086, "step": 15530 }, { "epoch": 0.12544094024200253, "grad_norm": 1.208888292312622, "learning_rate": 1.7502645331696327e-05, "loss": 3.2538028717041017, "step": 15540 }, { "epoch": 0.1255216615677696, "grad_norm": 0.9654739499092102, "learning_rate": 1.750102986195811e-05, "loss": 3.901014709472656, "step": 15550 }, { "epoch": 0.12560238289353665, "grad_norm": 1.3401739597320557, "learning_rate": 1.74994143922199e-05, "loss": 3.6444446563720705, "step": 15560 }, { "epoch": 0.1256831042193037, "grad_norm": 0.8195908069610596, "learning_rate": 1.7497798922481687e-05, "loss": 3.317433547973633, "step": 15570 }, { "epoch": 0.12576382554507076, "grad_norm": 0.5839756727218628, "learning_rate": 1.7496183452743475e-05, "loss": 3.422952651977539, "step": 15580 }, { "epoch": 0.12584454687083782, "grad_norm": 1.394698977470398, "learning_rate": 1.749456798300526e-05, "loss": 2.9915510177612306, "step": 15590 }, { "epoch": 0.12592526819660485, "grad_norm": 0.6826665997505188, "learning_rate": 1.7492952513267047e-05, "loss": 3.356245422363281, "step": 15600 }, { "epoch": 0.1260059895223719, "grad_norm": 0.9775680899620056, "learning_rate": 1.7491337043528834e-05, "loss": 3.287828063964844, "step": 15610 }, { "epoch": 0.12608671084813897, "grad_norm": 0.8209207057952881, "learning_rate": 1.7489721573790622e-05, "loss": 3.5329540252685545, "step": 15620 }, { "epoch": 0.12616743217390602, "grad_norm": 1.3742176294326782, "learning_rate": 1.7488106104052406e-05, "loss": 3.316793441772461, "step": 15630 }, { "epoch": 0.12624815349967308, "grad_norm": 1.0280025005340576, "learning_rate": 1.7486490634314194e-05, "loss": 3.5924346923828123, "step": 15640 }, { "epoch": 0.12632887482544014, "grad_norm": 2.1272242069244385, "learning_rate": 1.7484875164575982e-05, "loss": 3.8586990356445314, "step": 15650 }, { "epoch": 0.1264095961512072, "grad_norm": 1.0214121341705322, "learning_rate": 1.748325969483777e-05, "loss": 3.2209007263183596, "step": 15660 }, { "epoch": 0.12649031747697423, "grad_norm": 0.8045022487640381, "learning_rate": 1.7481644225099554e-05, "loss": 3.685464096069336, "step": 15670 }, { "epoch": 0.1265710388027413, "grad_norm": 1.3900450468063354, "learning_rate": 1.748002875536134e-05, "loss": 3.42626838684082, "step": 15680 }, { "epoch": 0.12665176012850834, "grad_norm": 1.0391923189163208, "learning_rate": 1.747841328562313e-05, "loss": 3.886464309692383, "step": 15690 }, { "epoch": 0.1267324814542754, "grad_norm": 1.2972145080566406, "learning_rate": 1.7476797815884917e-05, "loss": 3.0281723022460936, "step": 15700 }, { "epoch": 0.12681320278004246, "grad_norm": 0.8800085186958313, "learning_rate": 1.74751823461467e-05, "loss": 3.2694801330566405, "step": 15710 }, { "epoch": 0.12689392410580952, "grad_norm": 1.3327953815460205, "learning_rate": 1.747356687640849e-05, "loss": 3.4729766845703125, "step": 15720 }, { "epoch": 0.12697464543157658, "grad_norm": 1.4544984102249146, "learning_rate": 1.7471951406670277e-05, "loss": 3.517873001098633, "step": 15730 }, { "epoch": 0.12705536675734364, "grad_norm": 1.0196303129196167, "learning_rate": 1.7470335936932064e-05, "loss": 3.4474925994873047, "step": 15740 }, { "epoch": 0.12713608808311067, "grad_norm": 0.9471530318260193, "learning_rate": 1.746872046719385e-05, "loss": 3.187078666687012, "step": 15750 }, { "epoch": 0.12721680940887772, "grad_norm": 1.2284388542175293, "learning_rate": 1.7467104997455636e-05, "loss": 3.025089645385742, "step": 15760 }, { "epoch": 0.12729753073464478, "grad_norm": 1.2833571434020996, "learning_rate": 1.7465489527717424e-05, "loss": 3.792153549194336, "step": 15770 }, { "epoch": 0.12737825206041184, "grad_norm": 1.655132532119751, "learning_rate": 1.746387405797921e-05, "loss": 4.123207473754883, "step": 15780 }, { "epoch": 0.1274589733861789, "grad_norm": 0.7709908485412598, "learning_rate": 1.7462258588240996e-05, "loss": 3.519530487060547, "step": 15790 }, { "epoch": 0.12753969471194596, "grad_norm": 0.8249731659889221, "learning_rate": 1.7460643118502784e-05, "loss": 3.909021759033203, "step": 15800 }, { "epoch": 0.127620416037713, "grad_norm": 1.1060422658920288, "learning_rate": 1.745902764876457e-05, "loss": 3.251738739013672, "step": 15810 }, { "epoch": 0.12770113736348004, "grad_norm": 1.219903588294983, "learning_rate": 1.745741217902636e-05, "loss": 3.2779430389404296, "step": 15820 }, { "epoch": 0.1277818586892471, "grad_norm": 1.2428245544433594, "learning_rate": 1.7455796709288143e-05, "loss": 3.1248523712158205, "step": 15830 }, { "epoch": 0.12786258001501416, "grad_norm": 2.0827338695526123, "learning_rate": 1.745418123954993e-05, "loss": 3.737599182128906, "step": 15840 }, { "epoch": 0.12794330134078122, "grad_norm": 1.0330225229263306, "learning_rate": 1.745256576981172e-05, "loss": 3.323076629638672, "step": 15850 }, { "epoch": 0.12802402266654828, "grad_norm": 0.9186128377914429, "learning_rate": 1.7450950300073506e-05, "loss": 3.717391586303711, "step": 15860 }, { "epoch": 0.12810474399231533, "grad_norm": 1.3008354902267456, "learning_rate": 1.744933483033529e-05, "loss": 3.568034362792969, "step": 15870 }, { "epoch": 0.1281854653180824, "grad_norm": 0.6437421441078186, "learning_rate": 1.744771936059708e-05, "loss": 3.4813995361328125, "step": 15880 }, { "epoch": 0.12826618664384945, "grad_norm": 1.2183371782302856, "learning_rate": 1.7446103890858866e-05, "loss": 3.3950164794921873, "step": 15890 }, { "epoch": 0.12834690796961648, "grad_norm": 0.955516517162323, "learning_rate": 1.7444488421120654e-05, "loss": 3.62447509765625, "step": 15900 }, { "epoch": 0.12842762929538354, "grad_norm": 1.8702481985092163, "learning_rate": 1.7442872951382438e-05, "loss": 3.0603004455566407, "step": 15910 }, { "epoch": 0.1285083506211506, "grad_norm": 1.3304123878479004, "learning_rate": 1.7441257481644226e-05, "loss": 3.0777734756469726, "step": 15920 }, { "epoch": 0.12858907194691765, "grad_norm": 1.0430526733398438, "learning_rate": 1.7439642011906013e-05, "loss": 3.555886077880859, "step": 15930 }, { "epoch": 0.1286697932726847, "grad_norm": 0.7084943652153015, "learning_rate": 1.74380265421678e-05, "loss": 3.078866958618164, "step": 15940 }, { "epoch": 0.12875051459845177, "grad_norm": 0.7296786904335022, "learning_rate": 1.7436411072429585e-05, "loss": 3.3762371063232424, "step": 15950 }, { "epoch": 0.12883123592421883, "grad_norm": 1.168257236480713, "learning_rate": 1.7434795602691373e-05, "loss": 3.382948303222656, "step": 15960 }, { "epoch": 0.1289119572499859, "grad_norm": 1.33810555934906, "learning_rate": 1.743318013295316e-05, "loss": 4.134858703613281, "step": 15970 }, { "epoch": 0.12899267857575292, "grad_norm": 1.079829454421997, "learning_rate": 1.743156466321495e-05, "loss": 3.212234878540039, "step": 15980 }, { "epoch": 0.12907339990151998, "grad_norm": 0.4768875539302826, "learning_rate": 1.7429949193476733e-05, "loss": 3.1496746063232424, "step": 15990 }, { "epoch": 0.12915412122728703, "grad_norm": 1.1020983457565308, "learning_rate": 1.742833372373852e-05, "loss": 3.3929058074951173, "step": 16000 }, { "epoch": 0.1292348425530541, "grad_norm": 0.7477544546127319, "learning_rate": 1.7426718254000308e-05, "loss": 3.1916099548339845, "step": 16010 }, { "epoch": 0.12931556387882115, "grad_norm": 0.9643318057060242, "learning_rate": 1.7425102784262096e-05, "loss": 3.314693832397461, "step": 16020 }, { "epoch": 0.1293962852045882, "grad_norm": 1.7064672708511353, "learning_rate": 1.742348731452388e-05, "loss": 3.984284591674805, "step": 16030 }, { "epoch": 0.12947700653035527, "grad_norm": 0.703193724155426, "learning_rate": 1.7421871844785668e-05, "loss": 2.9929113388061523, "step": 16040 }, { "epoch": 0.1295577278561223, "grad_norm": 0.6593713164329529, "learning_rate": 1.7420256375047456e-05, "loss": 3.475080871582031, "step": 16050 }, { "epoch": 0.12963844918188935, "grad_norm": 1.0604122877120972, "learning_rate": 1.7418640905309243e-05, "loss": 3.383950042724609, "step": 16060 }, { "epoch": 0.1297191705076564, "grad_norm": 1.043336272239685, "learning_rate": 1.7417025435571028e-05, "loss": 3.6658885955810545, "step": 16070 }, { "epoch": 0.12979989183342347, "grad_norm": 0.6597950458526611, "learning_rate": 1.7415409965832815e-05, "loss": 3.008285713195801, "step": 16080 }, { "epoch": 0.12988061315919053, "grad_norm": 1.1127700805664062, "learning_rate": 1.7413794496094603e-05, "loss": 4.280392837524414, "step": 16090 }, { "epoch": 0.12996133448495759, "grad_norm": 1.1362652778625488, "learning_rate": 1.741217902635639e-05, "loss": 3.154010200500488, "step": 16100 }, { "epoch": 0.13004205581072464, "grad_norm": 0.7501316070556641, "learning_rate": 1.7410563556618175e-05, "loss": 3.640568161010742, "step": 16110 }, { "epoch": 0.1301227771364917, "grad_norm": 1.028833270072937, "learning_rate": 1.7408948086879963e-05, "loss": 3.0525245666503906, "step": 16120 }, { "epoch": 0.13020349846225873, "grad_norm": 0.8748075366020203, "learning_rate": 1.740733261714175e-05, "loss": 3.3450557708740236, "step": 16130 }, { "epoch": 0.1302842197880258, "grad_norm": 1.2176933288574219, "learning_rate": 1.7405717147403538e-05, "loss": 3.405997085571289, "step": 16140 }, { "epoch": 0.13036494111379285, "grad_norm": 0.8578622937202454, "learning_rate": 1.7404101677665322e-05, "loss": 2.871760368347168, "step": 16150 }, { "epoch": 0.1304456624395599, "grad_norm": 0.8497809767723083, "learning_rate": 1.7402486207927113e-05, "loss": 3.3664249420166015, "step": 16160 }, { "epoch": 0.13052638376532696, "grad_norm": 0.9023446440696716, "learning_rate": 1.7400870738188898e-05, "loss": 4.292705917358399, "step": 16170 }, { "epoch": 0.13060710509109402, "grad_norm": 0.891436755657196, "learning_rate": 1.7399255268450685e-05, "loss": 2.7959100723266603, "step": 16180 }, { "epoch": 0.13068782641686108, "grad_norm": 1.7653878927230835, "learning_rate": 1.739763979871247e-05, "loss": 3.984880065917969, "step": 16190 }, { "epoch": 0.13076854774262814, "grad_norm": 0.7861128449440002, "learning_rate": 1.739602432897426e-05, "loss": 3.1341012954711913, "step": 16200 }, { "epoch": 0.13084926906839517, "grad_norm": 0.5957671403884888, "learning_rate": 1.7394408859236045e-05, "loss": 3.4459346771240233, "step": 16210 }, { "epoch": 0.13092999039416223, "grad_norm": 0.7640578746795654, "learning_rate": 1.7392793389497833e-05, "loss": 3.237563705444336, "step": 16220 }, { "epoch": 0.13101071171992928, "grad_norm": 0.8834221959114075, "learning_rate": 1.7391177919759617e-05, "loss": 4.0465747833251955, "step": 16230 }, { "epoch": 0.13109143304569634, "grad_norm": 1.0577608346939087, "learning_rate": 1.7389562450021408e-05, "loss": 3.3757164001464846, "step": 16240 }, { "epoch": 0.1311721543714634, "grad_norm": 1.0549590587615967, "learning_rate": 1.7387946980283193e-05, "loss": 3.6463882446289064, "step": 16250 }, { "epoch": 0.13125287569723046, "grad_norm": 1.7131191492080688, "learning_rate": 1.738633151054498e-05, "loss": 3.526563262939453, "step": 16260 }, { "epoch": 0.13133359702299752, "grad_norm": 1.0004782676696777, "learning_rate": 1.7384716040806764e-05, "loss": 3.5785491943359373, "step": 16270 }, { "epoch": 0.13141431834876455, "grad_norm": 1.513611912727356, "learning_rate": 1.7383100571068556e-05, "loss": 4.112396621704102, "step": 16280 }, { "epoch": 0.1314950396745316, "grad_norm": 0.5477797389030457, "learning_rate": 1.738148510133034e-05, "loss": 3.8994117736816407, "step": 16290 }, { "epoch": 0.13157576100029866, "grad_norm": 1.0004903078079224, "learning_rate": 1.7379869631592128e-05, "loss": 3.7303417205810545, "step": 16300 }, { "epoch": 0.13165648232606572, "grad_norm": 1.206632137298584, "learning_rate": 1.7378254161853912e-05, "loss": 3.6936744689941405, "step": 16310 }, { "epoch": 0.13173720365183278, "grad_norm": 0.7834997773170471, "learning_rate": 1.7376638692115703e-05, "loss": 3.2605354309082033, "step": 16320 }, { "epoch": 0.13181792497759984, "grad_norm": 0.8229953646659851, "learning_rate": 1.7375023222377487e-05, "loss": 3.9006221771240233, "step": 16330 }, { "epoch": 0.1318986463033669, "grad_norm": 1.3128513097763062, "learning_rate": 1.7373407752639275e-05, "loss": 3.5010616302490236, "step": 16340 }, { "epoch": 0.13197936762913395, "grad_norm": 0.8564761281013489, "learning_rate": 1.7371792282901063e-05, "loss": 3.384562683105469, "step": 16350 }, { "epoch": 0.13206008895490098, "grad_norm": 0.7319250702857971, "learning_rate": 1.737017681316285e-05, "loss": 2.9690494537353516, "step": 16360 }, { "epoch": 0.13214081028066804, "grad_norm": 0.5869671106338501, "learning_rate": 1.7368561343424635e-05, "loss": 3.42886962890625, "step": 16370 }, { "epoch": 0.1322215316064351, "grad_norm": 1.9572803974151611, "learning_rate": 1.7366945873686422e-05, "loss": 3.257202911376953, "step": 16380 }, { "epoch": 0.13230225293220216, "grad_norm": 0.8956857919692993, "learning_rate": 1.736533040394821e-05, "loss": 3.605042266845703, "step": 16390 }, { "epoch": 0.13238297425796922, "grad_norm": 1.117706060409546, "learning_rate": 1.7363714934209998e-05, "loss": 3.328521728515625, "step": 16400 }, { "epoch": 0.13246369558373627, "grad_norm": 1.6020269393920898, "learning_rate": 1.7362099464471782e-05, "loss": 3.226362609863281, "step": 16410 }, { "epoch": 0.13254441690950333, "grad_norm": 2.3222436904907227, "learning_rate": 1.736048399473357e-05, "loss": 4.078148269653321, "step": 16420 }, { "epoch": 0.13262513823527036, "grad_norm": 1.1592903137207031, "learning_rate": 1.7358868524995357e-05, "loss": 3.1348407745361326, "step": 16430 }, { "epoch": 0.13270585956103742, "grad_norm": 2.7582664489746094, "learning_rate": 1.7357253055257145e-05, "loss": 3.1515529632568358, "step": 16440 }, { "epoch": 0.13278658088680448, "grad_norm": 0.6876855492591858, "learning_rate": 1.735563758551893e-05, "loss": 3.0838220596313475, "step": 16450 }, { "epoch": 0.13286730221257154, "grad_norm": 0.8536441326141357, "learning_rate": 1.7354022115780717e-05, "loss": 3.348849868774414, "step": 16460 }, { "epoch": 0.1329480235383386, "grad_norm": 0.9664126634597778, "learning_rate": 1.7352406646042505e-05, "loss": 3.3886787414550783, "step": 16470 }, { "epoch": 0.13302874486410565, "grad_norm": 1.116860270500183, "learning_rate": 1.7350791176304292e-05, "loss": 3.316868209838867, "step": 16480 }, { "epoch": 0.1331094661898727, "grad_norm": 1.3234769105911255, "learning_rate": 1.7349175706566077e-05, "loss": 3.3017574310302735, "step": 16490 }, { "epoch": 0.13319018751563977, "grad_norm": 1.5714890956878662, "learning_rate": 1.7347560236827864e-05, "loss": 3.811983108520508, "step": 16500 }, { "epoch": 0.1332709088414068, "grad_norm": 0.9049363136291504, "learning_rate": 1.7345944767089652e-05, "loss": 3.1860258102416994, "step": 16510 }, { "epoch": 0.13335163016717386, "grad_norm": 0.8014972805976868, "learning_rate": 1.734432929735144e-05, "loss": 3.2555980682373047, "step": 16520 }, { "epoch": 0.13343235149294091, "grad_norm": 0.8585716485977173, "learning_rate": 1.7342713827613228e-05, "loss": 3.4026458740234373, "step": 16530 }, { "epoch": 0.13351307281870797, "grad_norm": 0.8028514385223389, "learning_rate": 1.7341098357875012e-05, "loss": 3.2744983673095702, "step": 16540 }, { "epoch": 0.13359379414447503, "grad_norm": 1.037174940109253, "learning_rate": 1.73394828881368e-05, "loss": 3.4800113677978515, "step": 16550 }, { "epoch": 0.1336745154702421, "grad_norm": 1.016398549079895, "learning_rate": 1.7337867418398587e-05, "loss": 3.60850830078125, "step": 16560 }, { "epoch": 0.13375523679600915, "grad_norm": 1.8172415494918823, "learning_rate": 1.7336251948660375e-05, "loss": 3.302545928955078, "step": 16570 }, { "epoch": 0.1338359581217762, "grad_norm": 0.7216295599937439, "learning_rate": 1.733463647892216e-05, "loss": 3.435475540161133, "step": 16580 }, { "epoch": 0.13391667944754324, "grad_norm": 1.0163449048995972, "learning_rate": 1.7333021009183947e-05, "loss": 3.6092960357666017, "step": 16590 }, { "epoch": 0.1339974007733103, "grad_norm": 1.0598844289779663, "learning_rate": 1.7331405539445735e-05, "loss": 3.1272741317749024, "step": 16600 }, { "epoch": 0.13407812209907735, "grad_norm": 0.8008299469947815, "learning_rate": 1.7329790069707522e-05, "loss": 3.333908462524414, "step": 16610 }, { "epoch": 0.1341588434248444, "grad_norm": 0.7111622095108032, "learning_rate": 1.7328174599969307e-05, "loss": 2.9857067108154296, "step": 16620 }, { "epoch": 0.13423956475061147, "grad_norm": 0.643173098564148, "learning_rate": 1.7326559130231094e-05, "loss": 3.2036201477050783, "step": 16630 }, { "epoch": 0.13432028607637853, "grad_norm": 1.1782695055007935, "learning_rate": 1.7324943660492882e-05, "loss": 3.27041015625, "step": 16640 }, { "epoch": 0.13440100740214558, "grad_norm": 0.9698538780212402, "learning_rate": 1.732332819075467e-05, "loss": 3.4587730407714843, "step": 16650 }, { "epoch": 0.13448172872791261, "grad_norm": 0.7784515619277954, "learning_rate": 1.7321712721016454e-05, "loss": 3.255635070800781, "step": 16660 }, { "epoch": 0.13456245005367967, "grad_norm": 0.6493768692016602, "learning_rate": 1.7320097251278242e-05, "loss": 3.218474197387695, "step": 16670 }, { "epoch": 0.13464317137944673, "grad_norm": 0.7331141233444214, "learning_rate": 1.731848178154003e-05, "loss": 3.3622894287109375, "step": 16680 }, { "epoch": 0.1347238927052138, "grad_norm": 0.9891740083694458, "learning_rate": 1.7316866311801817e-05, "loss": 3.3133392333984375, "step": 16690 }, { "epoch": 0.13480461403098085, "grad_norm": 0.6519855856895447, "learning_rate": 1.73152508420636e-05, "loss": 3.171248435974121, "step": 16700 }, { "epoch": 0.1348853353567479, "grad_norm": 0.8091297149658203, "learning_rate": 1.731363537232539e-05, "loss": 3.468941879272461, "step": 16710 }, { "epoch": 0.13496605668251496, "grad_norm": 0.9928140044212341, "learning_rate": 1.7312019902587177e-05, "loss": 3.7597900390625, "step": 16720 }, { "epoch": 0.13504677800828202, "grad_norm": 1.1214970350265503, "learning_rate": 1.7310404432848964e-05, "loss": 4.051733779907226, "step": 16730 }, { "epoch": 0.13512749933404905, "grad_norm": 1.0060986280441284, "learning_rate": 1.730878896311075e-05, "loss": 3.187465476989746, "step": 16740 }, { "epoch": 0.1352082206598161, "grad_norm": 0.915601909160614, "learning_rate": 1.7307173493372536e-05, "loss": 3.615531158447266, "step": 16750 }, { "epoch": 0.13528894198558317, "grad_norm": 0.9216950535774231, "learning_rate": 1.7305558023634324e-05, "loss": 3.4464771270751955, "step": 16760 }, { "epoch": 0.13536966331135022, "grad_norm": 1.4442813396453857, "learning_rate": 1.7303942553896112e-05, "loss": 3.3223037719726562, "step": 16770 }, { "epoch": 0.13545038463711728, "grad_norm": 0.7502824664115906, "learning_rate": 1.7302327084157896e-05, "loss": 3.2546485900878905, "step": 16780 }, { "epoch": 0.13553110596288434, "grad_norm": 1.0631072521209717, "learning_rate": 1.7300711614419684e-05, "loss": 3.5367252349853517, "step": 16790 }, { "epoch": 0.1356118272886514, "grad_norm": 0.8256752490997314, "learning_rate": 1.729909614468147e-05, "loss": 3.4949474334716797, "step": 16800 }, { "epoch": 0.13569254861441846, "grad_norm": 1.2969512939453125, "learning_rate": 1.729748067494326e-05, "loss": 3.4133674621582033, "step": 16810 }, { "epoch": 0.1357732699401855, "grad_norm": 1.5346288681030273, "learning_rate": 1.7295865205205044e-05, "loss": 3.048838806152344, "step": 16820 }, { "epoch": 0.13585399126595255, "grad_norm": 1.0335098505020142, "learning_rate": 1.729424973546683e-05, "loss": 3.4893070220947267, "step": 16830 }, { "epoch": 0.1359347125917196, "grad_norm": 1.2595596313476562, "learning_rate": 1.729263426572862e-05, "loss": 3.233911895751953, "step": 16840 }, { "epoch": 0.13601543391748666, "grad_norm": 0.6326298713684082, "learning_rate": 1.7291018795990407e-05, "loss": 3.3116912841796875, "step": 16850 }, { "epoch": 0.13609615524325372, "grad_norm": 0.9991166591644287, "learning_rate": 1.728940332625219e-05, "loss": 3.3341625213623045, "step": 16860 }, { "epoch": 0.13617687656902078, "grad_norm": 1.1651116609573364, "learning_rate": 1.728778785651398e-05, "loss": 3.0894325256347654, "step": 16870 }, { "epoch": 0.13625759789478784, "grad_norm": 0.6160335540771484, "learning_rate": 1.7286172386775766e-05, "loss": 3.7402423858642577, "step": 16880 }, { "epoch": 0.13633831922055487, "grad_norm": 0.8390512466430664, "learning_rate": 1.7284556917037554e-05, "loss": 3.2485015869140623, "step": 16890 }, { "epoch": 0.13641904054632192, "grad_norm": 0.5221471190452576, "learning_rate": 1.728294144729934e-05, "loss": 3.9637451171875, "step": 16900 }, { "epoch": 0.13649976187208898, "grad_norm": 0.7327975630760193, "learning_rate": 1.7281325977561126e-05, "loss": 3.1685583114624025, "step": 16910 }, { "epoch": 0.13658048319785604, "grad_norm": 0.9505554437637329, "learning_rate": 1.7279710507822914e-05, "loss": 3.4083702087402346, "step": 16920 }, { "epoch": 0.1366612045236231, "grad_norm": 0.9857237935066223, "learning_rate": 1.72780950380847e-05, "loss": 3.1166446685791014, "step": 16930 }, { "epoch": 0.13674192584939016, "grad_norm": 0.7191462516784668, "learning_rate": 1.7276479568346486e-05, "loss": 3.6102481842041017, "step": 16940 }, { "epoch": 0.1368226471751572, "grad_norm": 0.6723006963729858, "learning_rate": 1.7274864098608273e-05, "loss": 3.5622417449951174, "step": 16950 }, { "epoch": 0.13690336850092427, "grad_norm": 1.2179467678070068, "learning_rate": 1.727324862887006e-05, "loss": 3.165636444091797, "step": 16960 }, { "epoch": 0.1369840898266913, "grad_norm": 1.41344153881073, "learning_rate": 1.727163315913185e-05, "loss": 3.4415767669677733, "step": 16970 }, { "epoch": 0.13706481115245836, "grad_norm": 0.5508615970611572, "learning_rate": 1.7270017689393633e-05, "loss": 3.296820068359375, "step": 16980 }, { "epoch": 0.13714553247822542, "grad_norm": 1.7018675804138184, "learning_rate": 1.726840221965542e-05, "loss": 3.6112255096435546, "step": 16990 }, { "epoch": 0.13722625380399248, "grad_norm": 0.8916847705841064, "learning_rate": 1.726678674991721e-05, "loss": 3.8670711517333984, "step": 17000 }, { "epoch": 0.13730697512975953, "grad_norm": 0.942166805267334, "learning_rate": 1.7265171280178996e-05, "loss": 2.755150032043457, "step": 17010 }, { "epoch": 0.1373876964555266, "grad_norm": 1.0701649188995361, "learning_rate": 1.726355581044078e-05, "loss": 3.671742630004883, "step": 17020 }, { "epoch": 0.13746841778129365, "grad_norm": 0.6808016896247864, "learning_rate": 1.726194034070257e-05, "loss": 3.5238063812255858, "step": 17030 }, { "epoch": 0.13754913910706068, "grad_norm": 0.7955338358879089, "learning_rate": 1.7260324870964356e-05, "loss": 3.4323684692382814, "step": 17040 }, { "epoch": 0.13762986043282774, "grad_norm": 1.0057145357131958, "learning_rate": 1.7258709401226144e-05, "loss": 3.3386932373046876, "step": 17050 }, { "epoch": 0.1377105817585948, "grad_norm": 0.6132277250289917, "learning_rate": 1.7257093931487928e-05, "loss": 3.3236625671386717, "step": 17060 }, { "epoch": 0.13779130308436185, "grad_norm": 0.9339909553527832, "learning_rate": 1.725547846174972e-05, "loss": 3.0451019287109373, "step": 17070 }, { "epoch": 0.1378720244101289, "grad_norm": 0.5102546215057373, "learning_rate": 1.7253862992011503e-05, "loss": 3.0340034484863283, "step": 17080 }, { "epoch": 0.13795274573589597, "grad_norm": 1.0550416707992554, "learning_rate": 1.725224752227329e-05, "loss": 2.898138999938965, "step": 17090 }, { "epoch": 0.13803346706166303, "grad_norm": 1.0575405359268188, "learning_rate": 1.7250632052535075e-05, "loss": 3.5024539947509767, "step": 17100 }, { "epoch": 0.1381141883874301, "grad_norm": 0.7931674122810364, "learning_rate": 1.7249016582796866e-05, "loss": 3.692422103881836, "step": 17110 }, { "epoch": 0.13819490971319712, "grad_norm": 0.5458567142486572, "learning_rate": 1.724740111305865e-05, "loss": 3.0790754318237306, "step": 17120 }, { "epoch": 0.13827563103896418, "grad_norm": 1.0688177347183228, "learning_rate": 1.724578564332044e-05, "loss": 4.469657135009766, "step": 17130 }, { "epoch": 0.13835635236473123, "grad_norm": 1.4206773042678833, "learning_rate": 1.7244170173582223e-05, "loss": 3.4245620727539063, "step": 17140 }, { "epoch": 0.1384370736904983, "grad_norm": 1.0988194942474365, "learning_rate": 1.7242554703844014e-05, "loss": 3.2016281127929687, "step": 17150 }, { "epoch": 0.13851779501626535, "grad_norm": 0.812824010848999, "learning_rate": 1.7240939234105798e-05, "loss": 3.2509212493896484, "step": 17160 }, { "epoch": 0.1385985163420324, "grad_norm": 1.0389528274536133, "learning_rate": 1.7239323764367586e-05, "loss": 3.831251525878906, "step": 17170 }, { "epoch": 0.13867923766779947, "grad_norm": 1.8710381984710693, "learning_rate": 1.723770829462937e-05, "loss": 3.326626205444336, "step": 17180 }, { "epoch": 0.13875995899356652, "grad_norm": 0.9021636843681335, "learning_rate": 1.723609282489116e-05, "loss": 3.053822898864746, "step": 17190 }, { "epoch": 0.13884068031933355, "grad_norm": 1.1005170345306396, "learning_rate": 1.7234477355152945e-05, "loss": 3.223126983642578, "step": 17200 }, { "epoch": 0.1389214016451006, "grad_norm": 1.5990281105041504, "learning_rate": 1.7232861885414733e-05, "loss": 3.478589630126953, "step": 17210 }, { "epoch": 0.13900212297086767, "grad_norm": 0.7513356804847717, "learning_rate": 1.723124641567652e-05, "loss": 3.0593034744262697, "step": 17220 }, { "epoch": 0.13908284429663473, "grad_norm": 0.9742470979690552, "learning_rate": 1.722963094593831e-05, "loss": 3.2989437103271486, "step": 17230 }, { "epoch": 0.13916356562240179, "grad_norm": 0.7941574454307556, "learning_rate": 1.7228015476200093e-05, "loss": 3.200331115722656, "step": 17240 }, { "epoch": 0.13924428694816884, "grad_norm": 0.7856708765029907, "learning_rate": 1.722640000646188e-05, "loss": 3.258082962036133, "step": 17250 }, { "epoch": 0.1393250082739359, "grad_norm": 1.0805068016052246, "learning_rate": 1.7224784536723668e-05, "loss": 3.4521636962890625, "step": 17260 }, { "epoch": 0.13940572959970293, "grad_norm": 1.6121957302093506, "learning_rate": 1.7223169066985456e-05, "loss": 3.3179229736328124, "step": 17270 }, { "epoch": 0.13948645092547, "grad_norm": 0.9368249773979187, "learning_rate": 1.722155359724724e-05, "loss": 2.993208122253418, "step": 17280 }, { "epoch": 0.13956717225123705, "grad_norm": 1.1670560836791992, "learning_rate": 1.7219938127509028e-05, "loss": 3.6760997772216797, "step": 17290 }, { "epoch": 0.1396478935770041, "grad_norm": 0.7430161237716675, "learning_rate": 1.7218322657770816e-05, "loss": 3.8441822052001955, "step": 17300 }, { "epoch": 0.13972861490277116, "grad_norm": 1.4301543235778809, "learning_rate": 1.7216707188032603e-05, "loss": 3.741892623901367, "step": 17310 }, { "epoch": 0.13980933622853822, "grad_norm": 1.5951004028320312, "learning_rate": 1.7215091718294388e-05, "loss": 3.123087501525879, "step": 17320 }, { "epoch": 0.13989005755430528, "grad_norm": 0.7345407605171204, "learning_rate": 1.7213476248556175e-05, "loss": 2.8094484329223635, "step": 17330 }, { "epoch": 0.13997077888007234, "grad_norm": 1.0813102722167969, "learning_rate": 1.7211860778817963e-05, "loss": 3.318076324462891, "step": 17340 }, { "epoch": 0.14005150020583937, "grad_norm": 1.0075801610946655, "learning_rate": 1.721024530907975e-05, "loss": 3.796051788330078, "step": 17350 }, { "epoch": 0.14013222153160643, "grad_norm": 1.0909007787704468, "learning_rate": 1.7208629839341535e-05, "loss": 3.5947383880615233, "step": 17360 }, { "epoch": 0.14021294285737348, "grad_norm": 1.7394299507141113, "learning_rate": 1.7207014369603323e-05, "loss": 2.815334701538086, "step": 17370 }, { "epoch": 0.14029366418314054, "grad_norm": 0.8806025981903076, "learning_rate": 1.720539889986511e-05, "loss": 2.8409984588623045, "step": 17380 }, { "epoch": 0.1403743855089076, "grad_norm": 1.3365167379379272, "learning_rate": 1.7203783430126898e-05, "loss": 3.397386169433594, "step": 17390 }, { "epoch": 0.14045510683467466, "grad_norm": 0.7442888021469116, "learning_rate": 1.7202167960388682e-05, "loss": 3.532529830932617, "step": 17400 }, { "epoch": 0.14053582816044172, "grad_norm": 1.0301486253738403, "learning_rate": 1.720055249065047e-05, "loss": 3.161005973815918, "step": 17410 }, { "epoch": 0.14061654948620878, "grad_norm": 0.9410560131072998, "learning_rate": 1.7198937020912258e-05, "loss": 3.2982513427734377, "step": 17420 }, { "epoch": 0.1406972708119758, "grad_norm": 0.6720465421676636, "learning_rate": 1.7197321551174045e-05, "loss": 3.3200027465820314, "step": 17430 }, { "epoch": 0.14077799213774286, "grad_norm": 1.6268028020858765, "learning_rate": 1.719570608143583e-05, "loss": 3.3720447540283205, "step": 17440 }, { "epoch": 0.14085871346350992, "grad_norm": 0.9959383606910706, "learning_rate": 1.7194090611697617e-05, "loss": 3.4954998016357424, "step": 17450 }, { "epoch": 0.14093943478927698, "grad_norm": 1.0105888843536377, "learning_rate": 1.7192475141959405e-05, "loss": 3.2610538482666014, "step": 17460 }, { "epoch": 0.14102015611504404, "grad_norm": 0.667651355266571, "learning_rate": 1.7190859672221193e-05, "loss": 4.012660980224609, "step": 17470 }, { "epoch": 0.1411008774408111, "grad_norm": 1.8546229600906372, "learning_rate": 1.7189244202482977e-05, "loss": 3.796558380126953, "step": 17480 }, { "epoch": 0.14118159876657815, "grad_norm": 0.9861103892326355, "learning_rate": 1.7187628732744765e-05, "loss": 3.7196376800537108, "step": 17490 }, { "epoch": 0.14126232009234518, "grad_norm": 1.0214765071868896, "learning_rate": 1.7186013263006552e-05, "loss": 3.551393508911133, "step": 17500 }, { "epoch": 0.14134304141811224, "grad_norm": 3.6012721061706543, "learning_rate": 1.718439779326834e-05, "loss": 3.3602279663085937, "step": 17510 }, { "epoch": 0.1414237627438793, "grad_norm": 0.9550672769546509, "learning_rate": 1.7182782323530124e-05, "loss": 3.0133615493774415, "step": 17520 }, { "epoch": 0.14150448406964636, "grad_norm": 0.7686058878898621, "learning_rate": 1.7181166853791912e-05, "loss": 3.57017822265625, "step": 17530 }, { "epoch": 0.14158520539541342, "grad_norm": 0.8919861912727356, "learning_rate": 1.71795513840537e-05, "loss": 3.595281219482422, "step": 17540 }, { "epoch": 0.14166592672118047, "grad_norm": 0.9369406700134277, "learning_rate": 1.7177935914315488e-05, "loss": 3.4834735870361326, "step": 17550 }, { "epoch": 0.14174664804694753, "grad_norm": 1.052040696144104, "learning_rate": 1.7176320444577272e-05, "loss": 3.307221603393555, "step": 17560 }, { "epoch": 0.1418273693727146, "grad_norm": 1.0619471073150635, "learning_rate": 1.717470497483906e-05, "loss": 3.768778610229492, "step": 17570 }, { "epoch": 0.14190809069848162, "grad_norm": 1.3670045137405396, "learning_rate": 1.7173089505100847e-05, "loss": 3.5196277618408205, "step": 17580 }, { "epoch": 0.14198881202424868, "grad_norm": 1.0049476623535156, "learning_rate": 1.7171474035362635e-05, "loss": 3.1561294555664063, "step": 17590 }, { "epoch": 0.14206953335001574, "grad_norm": 1.2029380798339844, "learning_rate": 1.716985856562442e-05, "loss": 3.4383968353271483, "step": 17600 }, { "epoch": 0.1421502546757828, "grad_norm": 1.2716094255447388, "learning_rate": 1.7168243095886207e-05, "loss": 4.06457633972168, "step": 17610 }, { "epoch": 0.14223097600154985, "grad_norm": 1.4376647472381592, "learning_rate": 1.7166627626147995e-05, "loss": 3.241751480102539, "step": 17620 }, { "epoch": 0.1423116973273169, "grad_norm": 0.9872801303863525, "learning_rate": 1.7165012156409782e-05, "loss": 3.456673431396484, "step": 17630 }, { "epoch": 0.14239241865308397, "grad_norm": 0.9611067175865173, "learning_rate": 1.7163396686671567e-05, "loss": 3.2260032653808595, "step": 17640 }, { "epoch": 0.142473139978851, "grad_norm": 0.975013017654419, "learning_rate": 1.7161781216933354e-05, "loss": 3.752197265625, "step": 17650 }, { "epoch": 0.14255386130461806, "grad_norm": 0.8080700635910034, "learning_rate": 1.7160165747195142e-05, "loss": 3.543062210083008, "step": 17660 }, { "epoch": 0.14263458263038511, "grad_norm": 0.6276020407676697, "learning_rate": 1.715855027745693e-05, "loss": 3.5811614990234375, "step": 17670 }, { "epoch": 0.14271530395615217, "grad_norm": 0.8907416462898254, "learning_rate": 1.7156934807718714e-05, "loss": 3.556740570068359, "step": 17680 }, { "epoch": 0.14279602528191923, "grad_norm": 0.9013380408287048, "learning_rate": 1.71553193379805e-05, "loss": 3.219870758056641, "step": 17690 }, { "epoch": 0.1428767466076863, "grad_norm": 1.1265003681182861, "learning_rate": 1.715370386824229e-05, "loss": 3.209759521484375, "step": 17700 }, { "epoch": 0.14295746793345335, "grad_norm": 0.8457514643669128, "learning_rate": 1.7152088398504077e-05, "loss": 3.4081661224365236, "step": 17710 }, { "epoch": 0.1430381892592204, "grad_norm": 0.6937522888183594, "learning_rate": 1.715047292876586e-05, "loss": 3.502592086791992, "step": 17720 }, { "epoch": 0.14311891058498744, "grad_norm": 1.0648244619369507, "learning_rate": 1.714885745902765e-05, "loss": 3.3561267852783203, "step": 17730 }, { "epoch": 0.1431996319107545, "grad_norm": 0.812670886516571, "learning_rate": 1.7147241989289437e-05, "loss": 3.7661441802978515, "step": 17740 }, { "epoch": 0.14328035323652155, "grad_norm": 1.449694275856018, "learning_rate": 1.7145626519551224e-05, "loss": 3.3686126708984374, "step": 17750 }, { "epoch": 0.1433610745622886, "grad_norm": 1.15534245967865, "learning_rate": 1.714401104981301e-05, "loss": 3.1810977935791014, "step": 17760 }, { "epoch": 0.14344179588805567, "grad_norm": 1.0873346328735352, "learning_rate": 1.7142395580074796e-05, "loss": 3.104094314575195, "step": 17770 }, { "epoch": 0.14352251721382273, "grad_norm": 1.2059046030044556, "learning_rate": 1.7140780110336584e-05, "loss": 3.051801300048828, "step": 17780 }, { "epoch": 0.14360323853958978, "grad_norm": 0.816059410572052, "learning_rate": 1.7139164640598372e-05, "loss": 2.9512248992919923, "step": 17790 }, { "epoch": 0.14368395986535684, "grad_norm": 1.133601188659668, "learning_rate": 1.713754917086016e-05, "loss": 3.446070098876953, "step": 17800 }, { "epoch": 0.14376468119112387, "grad_norm": 1.1714779138565063, "learning_rate": 1.7135933701121944e-05, "loss": 3.4707794189453125, "step": 17810 }, { "epoch": 0.14384540251689093, "grad_norm": 0.9734809994697571, "learning_rate": 1.713431823138373e-05, "loss": 3.2870513916015627, "step": 17820 }, { "epoch": 0.143926123842658, "grad_norm": 1.3846213817596436, "learning_rate": 1.713270276164552e-05, "loss": 2.928371620178223, "step": 17830 }, { "epoch": 0.14400684516842505, "grad_norm": 0.8643144965171814, "learning_rate": 1.7131087291907307e-05, "loss": 3.2431308746337892, "step": 17840 }, { "epoch": 0.1440875664941921, "grad_norm": 1.1970210075378418, "learning_rate": 1.712947182216909e-05, "loss": 3.272807312011719, "step": 17850 }, { "epoch": 0.14416828781995916, "grad_norm": 1.9151768684387207, "learning_rate": 1.712785635243088e-05, "loss": 3.4376659393310547, "step": 17860 }, { "epoch": 0.14424900914572622, "grad_norm": 1.0852289199829102, "learning_rate": 1.7126240882692667e-05, "loss": 3.3750350952148436, "step": 17870 }, { "epoch": 0.14432973047149325, "grad_norm": 0.7593417763710022, "learning_rate": 1.7124625412954454e-05, "loss": 3.1423078536987306, "step": 17880 }, { "epoch": 0.1444104517972603, "grad_norm": 0.8516937494277954, "learning_rate": 1.712300994321624e-05, "loss": 3.6234607696533203, "step": 17890 }, { "epoch": 0.14449117312302737, "grad_norm": 1.2176589965820312, "learning_rate": 1.7121394473478026e-05, "loss": 3.3890449523925783, "step": 17900 }, { "epoch": 0.14457189444879442, "grad_norm": 0.9772283434867859, "learning_rate": 1.7119779003739814e-05, "loss": 3.4091812133789063, "step": 17910 }, { "epoch": 0.14465261577456148, "grad_norm": 1.013677954673767, "learning_rate": 1.71181635340016e-05, "loss": 3.0642183303833006, "step": 17920 }, { "epoch": 0.14473333710032854, "grad_norm": 0.768185019493103, "learning_rate": 1.7116548064263386e-05, "loss": 3.3094551086425783, "step": 17930 }, { "epoch": 0.1448140584260956, "grad_norm": 0.6470479965209961, "learning_rate": 1.7114932594525177e-05, "loss": 3.103542518615723, "step": 17940 }, { "epoch": 0.14489477975186266, "grad_norm": 0.7313074469566345, "learning_rate": 1.711331712478696e-05, "loss": 3.4576114654541015, "step": 17950 }, { "epoch": 0.1449755010776297, "grad_norm": 0.9478697776794434, "learning_rate": 1.711170165504875e-05, "loss": 3.239948272705078, "step": 17960 }, { "epoch": 0.14505622240339675, "grad_norm": 0.7996122241020203, "learning_rate": 1.7110086185310533e-05, "loss": 2.9533502578735353, "step": 17970 }, { "epoch": 0.1451369437291638, "grad_norm": 0.7545565366744995, "learning_rate": 1.7108470715572324e-05, "loss": 3.0892122268676756, "step": 17980 }, { "epoch": 0.14521766505493086, "grad_norm": 0.6878143548965454, "learning_rate": 1.710685524583411e-05, "loss": 3.043931770324707, "step": 17990 }, { "epoch": 0.14529838638069792, "grad_norm": 0.5458289980888367, "learning_rate": 1.7105239776095896e-05, "loss": 3.650380325317383, "step": 18000 }, { "epoch": 0.14537910770646498, "grad_norm": 1.5906639099121094, "learning_rate": 1.710362430635768e-05, "loss": 3.507328414916992, "step": 18010 }, { "epoch": 0.14545982903223204, "grad_norm": 0.6264005899429321, "learning_rate": 1.7102008836619472e-05, "loss": 2.927234649658203, "step": 18020 }, { "epoch": 0.1455405503579991, "grad_norm": 0.6564739346504211, "learning_rate": 1.7100393366881256e-05, "loss": 3.169577217102051, "step": 18030 }, { "epoch": 0.14562127168376612, "grad_norm": 0.8181636333465576, "learning_rate": 1.7098777897143044e-05, "loss": 3.494423675537109, "step": 18040 }, { "epoch": 0.14570199300953318, "grad_norm": 0.975333034992218, "learning_rate": 1.7097162427404828e-05, "loss": 3.1086074829101564, "step": 18050 }, { "epoch": 0.14578271433530024, "grad_norm": 0.8132694363594055, "learning_rate": 1.709554695766662e-05, "loss": 3.3660552978515623, "step": 18060 }, { "epoch": 0.1458634356610673, "grad_norm": 1.007509708404541, "learning_rate": 1.7093931487928403e-05, "loss": 3.340306854248047, "step": 18070 }, { "epoch": 0.14594415698683436, "grad_norm": 2.3348684310913086, "learning_rate": 1.709231601819019e-05, "loss": 3.3905654907226563, "step": 18080 }, { "epoch": 0.1460248783126014, "grad_norm": 1.0601290464401245, "learning_rate": 1.709070054845198e-05, "loss": 2.9339385986328126, "step": 18090 }, { "epoch": 0.14610559963836847, "grad_norm": 1.1140170097351074, "learning_rate": 1.7089085078713767e-05, "loss": 3.3259819030761717, "step": 18100 }, { "epoch": 0.1461863209641355, "grad_norm": 1.2432788610458374, "learning_rate": 1.708746960897555e-05, "loss": 3.277363586425781, "step": 18110 }, { "epoch": 0.14626704228990256, "grad_norm": 1.1254594326019287, "learning_rate": 1.708585413923734e-05, "loss": 3.7672149658203127, "step": 18120 }, { "epoch": 0.14634776361566962, "grad_norm": 0.8577263951301575, "learning_rate": 1.7084238669499126e-05, "loss": 3.8933815002441405, "step": 18130 }, { "epoch": 0.14642848494143668, "grad_norm": 0.9741876125335693, "learning_rate": 1.7082623199760914e-05, "loss": 3.0580812454223634, "step": 18140 }, { "epoch": 0.14650920626720373, "grad_norm": 2.7284011840820312, "learning_rate": 1.7081007730022698e-05, "loss": 4.113860702514648, "step": 18150 }, { "epoch": 0.1465899275929708, "grad_norm": 0.8721016645431519, "learning_rate": 1.7079392260284486e-05, "loss": 3.124547004699707, "step": 18160 }, { "epoch": 0.14667064891873785, "grad_norm": 0.7726221680641174, "learning_rate": 1.7077776790546274e-05, "loss": 3.358318328857422, "step": 18170 }, { "epoch": 0.1467513702445049, "grad_norm": 0.9856062531471252, "learning_rate": 1.707616132080806e-05, "loss": 2.9855440139770506, "step": 18180 }, { "epoch": 0.14683209157027194, "grad_norm": 0.8670963048934937, "learning_rate": 1.7074545851069846e-05, "loss": 3.2319900512695314, "step": 18190 }, { "epoch": 0.146912812896039, "grad_norm": 0.9366911053657532, "learning_rate": 1.7072930381331633e-05, "loss": 2.857010269165039, "step": 18200 }, { "epoch": 0.14699353422180605, "grad_norm": 1.237051010131836, "learning_rate": 1.707131491159342e-05, "loss": 3.0560688018798827, "step": 18210 }, { "epoch": 0.1470742555475731, "grad_norm": 1.1910967826843262, "learning_rate": 1.706969944185521e-05, "loss": 2.8445226669311525, "step": 18220 }, { "epoch": 0.14715497687334017, "grad_norm": 0.7628244161605835, "learning_rate": 1.7068083972116993e-05, "loss": 3.598002624511719, "step": 18230 }, { "epoch": 0.14723569819910723, "grad_norm": 1.5376102924346924, "learning_rate": 1.706646850237878e-05, "loss": 2.94571533203125, "step": 18240 }, { "epoch": 0.1473164195248743, "grad_norm": 1.000174641609192, "learning_rate": 1.706485303264057e-05, "loss": 3.206293487548828, "step": 18250 }, { "epoch": 0.14739714085064132, "grad_norm": 0.5249760150909424, "learning_rate": 1.7063237562902356e-05, "loss": 3.3038612365722657, "step": 18260 }, { "epoch": 0.14747786217640838, "grad_norm": 1.2895550727844238, "learning_rate": 1.706162209316414e-05, "loss": 3.152578353881836, "step": 18270 }, { "epoch": 0.14755858350217543, "grad_norm": 0.8745874166488647, "learning_rate": 1.7060006623425928e-05, "loss": 3.217665100097656, "step": 18280 }, { "epoch": 0.1476393048279425, "grad_norm": 0.5952158570289612, "learning_rate": 1.7058391153687716e-05, "loss": 3.5161556243896483, "step": 18290 }, { "epoch": 0.14772002615370955, "grad_norm": 0.9896431565284729, "learning_rate": 1.7056775683949503e-05, "loss": 3.3890106201171877, "step": 18300 }, { "epoch": 0.1478007474794766, "grad_norm": 1.2834646701812744, "learning_rate": 1.7055160214211288e-05, "loss": 3.561334991455078, "step": 18310 }, { "epoch": 0.14788146880524367, "grad_norm": 1.1038576364517212, "learning_rate": 1.7053544744473075e-05, "loss": 2.9891679763793944, "step": 18320 }, { "epoch": 0.14796219013101072, "grad_norm": 0.9323963522911072, "learning_rate": 1.7051929274734863e-05, "loss": 3.302827072143555, "step": 18330 }, { "epoch": 0.14804291145677775, "grad_norm": 0.5978184938430786, "learning_rate": 1.705031380499665e-05, "loss": 3.358002471923828, "step": 18340 }, { "epoch": 0.1481236327825448, "grad_norm": 0.7787986993789673, "learning_rate": 1.7048698335258435e-05, "loss": 3.2502647399902345, "step": 18350 }, { "epoch": 0.14820435410831187, "grad_norm": 0.7821052074432373, "learning_rate": 1.7047082865520223e-05, "loss": 3.572040557861328, "step": 18360 }, { "epoch": 0.14828507543407893, "grad_norm": 1.5759283304214478, "learning_rate": 1.704546739578201e-05, "loss": 3.3673377990722657, "step": 18370 }, { "epoch": 0.14836579675984599, "grad_norm": 1.035396695137024, "learning_rate": 1.7043851926043798e-05, "loss": 3.0892953872680664, "step": 18380 }, { "epoch": 0.14844651808561304, "grad_norm": 0.8376635313034058, "learning_rate": 1.7042236456305583e-05, "loss": 3.5148502349853517, "step": 18390 }, { "epoch": 0.1485272394113801, "grad_norm": 1.3104310035705566, "learning_rate": 1.704062098656737e-05, "loss": 3.1130863189697267, "step": 18400 }, { "epoch": 0.14860796073714716, "grad_norm": 0.8240359425544739, "learning_rate": 1.7039005516829158e-05, "loss": 3.0992197036743163, "step": 18410 }, { "epoch": 0.1486886820629142, "grad_norm": 0.7633980512619019, "learning_rate": 1.7037390047090946e-05, "loss": 3.8378860473632814, "step": 18420 }, { "epoch": 0.14876940338868125, "grad_norm": 0.789003849029541, "learning_rate": 1.703577457735273e-05, "loss": 3.593994140625, "step": 18430 }, { "epoch": 0.1488501247144483, "grad_norm": 1.2914509773254395, "learning_rate": 1.7034159107614518e-05, "loss": 3.5166900634765623, "step": 18440 }, { "epoch": 0.14893084604021536, "grad_norm": 0.9571731090545654, "learning_rate": 1.7032543637876305e-05, "loss": 3.2955677032470705, "step": 18450 }, { "epoch": 0.14901156736598242, "grad_norm": 0.9713795185089111, "learning_rate": 1.7030928168138093e-05, "loss": 3.9989398956298827, "step": 18460 }, { "epoch": 0.14909228869174948, "grad_norm": 1.0115551948547363, "learning_rate": 1.7029312698399877e-05, "loss": 3.539319610595703, "step": 18470 }, { "epoch": 0.14917301001751654, "grad_norm": 1.085042953491211, "learning_rate": 1.7027697228661665e-05, "loss": 2.8564804077148436, "step": 18480 }, { "epoch": 0.14925373134328357, "grad_norm": 0.837103009223938, "learning_rate": 1.7026081758923453e-05, "loss": 3.4292064666748048, "step": 18490 }, { "epoch": 0.14933445266905063, "grad_norm": 0.7344841361045837, "learning_rate": 1.702446628918524e-05, "loss": 3.4945682525634765, "step": 18500 }, { "epoch": 0.14941517399481768, "grad_norm": 0.7529246211051941, "learning_rate": 1.7022850819447025e-05, "loss": 3.194240951538086, "step": 18510 }, { "epoch": 0.14949589532058474, "grad_norm": 2.132814884185791, "learning_rate": 1.7021235349708812e-05, "loss": 3.5291667938232423, "step": 18520 }, { "epoch": 0.1495766166463518, "grad_norm": 1.143469214439392, "learning_rate": 1.70196198799706e-05, "loss": 3.34546012878418, "step": 18530 }, { "epoch": 0.14965733797211886, "grad_norm": 1.7665033340454102, "learning_rate": 1.7018004410232388e-05, "loss": 3.6253002166748045, "step": 18540 }, { "epoch": 0.14973805929788592, "grad_norm": 0.7858599424362183, "learning_rate": 1.7016388940494172e-05, "loss": 3.0432754516601563, "step": 18550 }, { "epoch": 0.14981878062365298, "grad_norm": 1.012290358543396, "learning_rate": 1.701477347075596e-05, "loss": 3.4211170196533205, "step": 18560 }, { "epoch": 0.14989950194942, "grad_norm": 0.8722105026245117, "learning_rate": 1.7013158001017747e-05, "loss": 3.285787582397461, "step": 18570 }, { "epoch": 0.14998022327518706, "grad_norm": 1.2247079610824585, "learning_rate": 1.7011542531279535e-05, "loss": 3.2326854705810546, "step": 18580 }, { "epoch": 0.15006094460095412, "grad_norm": 1.1520583629608154, "learning_rate": 1.700992706154132e-05, "loss": 2.664145278930664, "step": 18590 }, { "epoch": 0.15014166592672118, "grad_norm": 1.1809428930282593, "learning_rate": 1.7008311591803107e-05, "loss": 3.6338611602783204, "step": 18600 }, { "epoch": 0.15022238725248824, "grad_norm": 0.9169169068336487, "learning_rate": 1.7006696122064895e-05, "loss": 3.019044876098633, "step": 18610 }, { "epoch": 0.1503031085782553, "grad_norm": 1.8206193447113037, "learning_rate": 1.7005080652326683e-05, "loss": 3.237209701538086, "step": 18620 }, { "epoch": 0.15038382990402235, "grad_norm": 0.9223924279212952, "learning_rate": 1.7003465182588467e-05, "loss": 3.3284591674804687, "step": 18630 }, { "epoch": 0.1504645512297894, "grad_norm": 1.179299235343933, "learning_rate": 1.7001849712850255e-05, "loss": 3.1721521377563477, "step": 18640 }, { "epoch": 0.15054527255555644, "grad_norm": 1.032592535018921, "learning_rate": 1.7000234243112042e-05, "loss": 2.7668066024780273, "step": 18650 }, { "epoch": 0.1506259938813235, "grad_norm": 0.9811490774154663, "learning_rate": 1.699861877337383e-05, "loss": 3.2763545989990233, "step": 18660 }, { "epoch": 0.15070671520709056, "grad_norm": 1.073499083518982, "learning_rate": 1.6997003303635614e-05, "loss": 3.554726791381836, "step": 18670 }, { "epoch": 0.15078743653285762, "grad_norm": 0.8436908721923828, "learning_rate": 1.6995387833897402e-05, "loss": 3.4319042205810546, "step": 18680 }, { "epoch": 0.15086815785862467, "grad_norm": 1.4828293323516846, "learning_rate": 1.699377236415919e-05, "loss": 3.451231002807617, "step": 18690 }, { "epoch": 0.15094887918439173, "grad_norm": 1.002579927444458, "learning_rate": 1.6992156894420977e-05, "loss": 3.5343761444091797, "step": 18700 }, { "epoch": 0.1510296005101588, "grad_norm": 1.2796415090560913, "learning_rate": 1.699054142468276e-05, "loss": 3.991099166870117, "step": 18710 }, { "epoch": 0.15111032183592582, "grad_norm": 0.9020527601242065, "learning_rate": 1.698892595494455e-05, "loss": 3.4146778106689455, "step": 18720 }, { "epoch": 0.15119104316169288, "grad_norm": 1.2834830284118652, "learning_rate": 1.6987310485206337e-05, "loss": 3.0806301116943358, "step": 18730 }, { "epoch": 0.15127176448745994, "grad_norm": 0.8517480492591858, "learning_rate": 1.6985695015468125e-05, "loss": 3.319265365600586, "step": 18740 }, { "epoch": 0.151352485813227, "grad_norm": 1.3134939670562744, "learning_rate": 1.698407954572991e-05, "loss": 3.448992919921875, "step": 18750 }, { "epoch": 0.15143320713899405, "grad_norm": 1.0474789142608643, "learning_rate": 1.6982464075991697e-05, "loss": 3.3925617218017576, "step": 18760 }, { "epoch": 0.1515139284647611, "grad_norm": 1.03651762008667, "learning_rate": 1.6980848606253484e-05, "loss": 3.3843925476074217, "step": 18770 }, { "epoch": 0.15159464979052817, "grad_norm": 0.8444377779960632, "learning_rate": 1.6979233136515272e-05, "loss": 3.234894943237305, "step": 18780 }, { "epoch": 0.15167537111629523, "grad_norm": 0.9702632427215576, "learning_rate": 1.6977617666777056e-05, "loss": 3.2885738372802735, "step": 18790 }, { "epoch": 0.15175609244206226, "grad_norm": 0.7610812187194824, "learning_rate": 1.6976002197038844e-05, "loss": 3.8193374633789063, "step": 18800 }, { "epoch": 0.15183681376782932, "grad_norm": 0.6416915059089661, "learning_rate": 1.6974386727300632e-05, "loss": 3.2271278381347654, "step": 18810 }, { "epoch": 0.15191753509359637, "grad_norm": 1.2935748100280762, "learning_rate": 1.697277125756242e-05, "loss": 2.861918258666992, "step": 18820 }, { "epoch": 0.15199825641936343, "grad_norm": 1.1487517356872559, "learning_rate": 1.6971155787824204e-05, "loss": 3.4807628631591796, "step": 18830 }, { "epoch": 0.1520789777451305, "grad_norm": 0.7401382327079773, "learning_rate": 1.696954031808599e-05, "loss": 3.148748779296875, "step": 18840 }, { "epoch": 0.15215969907089755, "grad_norm": 1.1385283470153809, "learning_rate": 1.696792484834778e-05, "loss": 3.22017822265625, "step": 18850 }, { "epoch": 0.1522404203966646, "grad_norm": 0.8034140467643738, "learning_rate": 1.6966309378609567e-05, "loss": 2.9955217361450197, "step": 18860 }, { "epoch": 0.15232114172243164, "grad_norm": 0.8178650140762329, "learning_rate": 1.696469390887135e-05, "loss": 3.22717170715332, "step": 18870 }, { "epoch": 0.1524018630481987, "grad_norm": 1.1321005821228027, "learning_rate": 1.696307843913314e-05, "loss": 3.354756546020508, "step": 18880 }, { "epoch": 0.15248258437396575, "grad_norm": 0.8777665495872498, "learning_rate": 1.6961462969394927e-05, "loss": 3.9740604400634765, "step": 18890 }, { "epoch": 0.1525633056997328, "grad_norm": 1.1958885192871094, "learning_rate": 1.6959847499656714e-05, "loss": 3.165296936035156, "step": 18900 }, { "epoch": 0.15264402702549987, "grad_norm": 1.0935535430908203, "learning_rate": 1.69582320299185e-05, "loss": 3.1327959060668946, "step": 18910 }, { "epoch": 0.15272474835126693, "grad_norm": 1.8701854944229126, "learning_rate": 1.6956616560180286e-05, "loss": 3.065797805786133, "step": 18920 }, { "epoch": 0.15280546967703398, "grad_norm": 0.9924836754798889, "learning_rate": 1.6955001090442074e-05, "loss": 3.245972442626953, "step": 18930 }, { "epoch": 0.15288619100280104, "grad_norm": 1.0924723148345947, "learning_rate": 1.695338562070386e-05, "loss": 3.0171279907226562, "step": 18940 }, { "epoch": 0.15296691232856807, "grad_norm": 0.7023959159851074, "learning_rate": 1.6951770150965646e-05, "loss": 2.9879314422607424, "step": 18950 }, { "epoch": 0.15304763365433513, "grad_norm": 0.9164302945137024, "learning_rate": 1.6950154681227437e-05, "loss": 3.6485042572021484, "step": 18960 }, { "epoch": 0.1531283549801022, "grad_norm": 0.8581939339637756, "learning_rate": 1.694853921148922e-05, "loss": 3.5484813690185546, "step": 18970 }, { "epoch": 0.15320907630586925, "grad_norm": 0.9812690615653992, "learning_rate": 1.694692374175101e-05, "loss": 3.3634658813476563, "step": 18980 }, { "epoch": 0.1532897976316363, "grad_norm": 0.9891710877418518, "learning_rate": 1.6945308272012793e-05, "loss": 3.486370086669922, "step": 18990 }, { "epoch": 0.15337051895740336, "grad_norm": 0.7663323283195496, "learning_rate": 1.6943692802274584e-05, "loss": 3.2187374114990233, "step": 19000 }, { "epoch": 0.15345124028317042, "grad_norm": 1.6997822523117065, "learning_rate": 1.694207733253637e-05, "loss": 3.3905868530273438, "step": 19010 }, { "epoch": 0.15353196160893748, "grad_norm": 1.1006027460098267, "learning_rate": 1.6940461862798156e-05, "loss": 3.60827751159668, "step": 19020 }, { "epoch": 0.1536126829347045, "grad_norm": 1.412122130393982, "learning_rate": 1.6938846393059944e-05, "loss": 3.7203372955322265, "step": 19030 }, { "epoch": 0.15369340426047157, "grad_norm": 0.9695286750793457, "learning_rate": 1.6937230923321732e-05, "loss": 3.5201438903808593, "step": 19040 }, { "epoch": 0.15377412558623862, "grad_norm": 0.6562961935997009, "learning_rate": 1.693561545358352e-05, "loss": 3.573560333251953, "step": 19050 }, { "epoch": 0.15385484691200568, "grad_norm": 1.2544748783111572, "learning_rate": 1.6933999983845304e-05, "loss": 3.3962657928466795, "step": 19060 }, { "epoch": 0.15393556823777274, "grad_norm": 0.742588222026825, "learning_rate": 1.693238451410709e-05, "loss": 3.6160579681396485, "step": 19070 }, { "epoch": 0.1540162895635398, "grad_norm": 0.7848719358444214, "learning_rate": 1.693076904436888e-05, "loss": 3.662109375, "step": 19080 }, { "epoch": 0.15409701088930686, "grad_norm": 0.6143650412559509, "learning_rate": 1.6929153574630667e-05, "loss": 3.1514165878295897, "step": 19090 }, { "epoch": 0.1541777322150739, "grad_norm": 1.3743902444839478, "learning_rate": 1.692753810489245e-05, "loss": 3.0028940200805665, "step": 19100 }, { "epoch": 0.15425845354084095, "grad_norm": 1.3288723230361938, "learning_rate": 1.692592263515424e-05, "loss": 3.186778450012207, "step": 19110 }, { "epoch": 0.154339174866608, "grad_norm": 0.8513529896736145, "learning_rate": 1.6924307165416027e-05, "loss": 2.9498172760009767, "step": 19120 }, { "epoch": 0.15441989619237506, "grad_norm": 0.6633292436599731, "learning_rate": 1.6922691695677814e-05, "loss": 3.3058197021484377, "step": 19130 }, { "epoch": 0.15450061751814212, "grad_norm": 5.990630149841309, "learning_rate": 1.69210762259396e-05, "loss": 3.55377197265625, "step": 19140 }, { "epoch": 0.15458133884390918, "grad_norm": 0.6903915405273438, "learning_rate": 1.6919460756201386e-05, "loss": 3.5558246612548827, "step": 19150 }, { "epoch": 0.15466206016967624, "grad_norm": 1.1724952459335327, "learning_rate": 1.6917845286463174e-05, "loss": 2.9632646560668947, "step": 19160 }, { "epoch": 0.1547427814954433, "grad_norm": 2.311767816543579, "learning_rate": 1.691622981672496e-05, "loss": 3.075531005859375, "step": 19170 }, { "epoch": 0.15482350282121032, "grad_norm": 0.8855867385864258, "learning_rate": 1.6914614346986746e-05, "loss": 3.196503829956055, "step": 19180 }, { "epoch": 0.15490422414697738, "grad_norm": 1.2510308027267456, "learning_rate": 1.6912998877248534e-05, "loss": 3.242275619506836, "step": 19190 }, { "epoch": 0.15498494547274444, "grad_norm": 1.533078670501709, "learning_rate": 1.691138340751032e-05, "loss": 3.435502624511719, "step": 19200 }, { "epoch": 0.1550656667985115, "grad_norm": 1.4944254159927368, "learning_rate": 1.690976793777211e-05, "loss": 3.654922866821289, "step": 19210 }, { "epoch": 0.15514638812427856, "grad_norm": 1.119842290878296, "learning_rate": 1.6908152468033893e-05, "loss": 2.957832908630371, "step": 19220 }, { "epoch": 0.15522710945004561, "grad_norm": 0.6764616370201111, "learning_rate": 1.690653699829568e-05, "loss": 3.1843181610107423, "step": 19230 }, { "epoch": 0.15530783077581267, "grad_norm": 1.1968938112258911, "learning_rate": 1.690492152855747e-05, "loss": 2.9509441375732424, "step": 19240 }, { "epoch": 0.15538855210157973, "grad_norm": 1.317968726158142, "learning_rate": 1.6903306058819256e-05, "loss": 3.4343193054199217, "step": 19250 }, { "epoch": 0.15546927342734676, "grad_norm": 1.2973823547363281, "learning_rate": 1.690169058908104e-05, "loss": 3.200858306884766, "step": 19260 }, { "epoch": 0.15554999475311382, "grad_norm": 0.9907042980194092, "learning_rate": 1.690007511934283e-05, "loss": 3.26114501953125, "step": 19270 }, { "epoch": 0.15563071607888088, "grad_norm": 1.7992020845413208, "learning_rate": 1.6898459649604616e-05, "loss": 3.4090782165527345, "step": 19280 }, { "epoch": 0.15571143740464793, "grad_norm": 1.0974493026733398, "learning_rate": 1.6896844179866404e-05, "loss": 3.4466873168945313, "step": 19290 }, { "epoch": 0.155792158730415, "grad_norm": 0.509930431842804, "learning_rate": 1.6895228710128188e-05, "loss": 3.6425701141357423, "step": 19300 }, { "epoch": 0.15587288005618205, "grad_norm": 0.5984403491020203, "learning_rate": 1.6893613240389976e-05, "loss": 3.951824951171875, "step": 19310 }, { "epoch": 0.1559536013819491, "grad_norm": 0.7848033905029297, "learning_rate": 1.6891997770651763e-05, "loss": 3.0436153411865234, "step": 19320 }, { "epoch": 0.15603432270771614, "grad_norm": 0.778779923915863, "learning_rate": 1.689038230091355e-05, "loss": 2.9774717330932616, "step": 19330 }, { "epoch": 0.1561150440334832, "grad_norm": 1.447962760925293, "learning_rate": 1.6888766831175335e-05, "loss": 3.2205921173095704, "step": 19340 }, { "epoch": 0.15619576535925025, "grad_norm": 1.119388461112976, "learning_rate": 1.6887151361437123e-05, "loss": 3.32129020690918, "step": 19350 }, { "epoch": 0.1562764866850173, "grad_norm": 0.5505067706108093, "learning_rate": 1.688553589169891e-05, "loss": 3.1353343963623046, "step": 19360 }, { "epoch": 0.15635720801078437, "grad_norm": 1.0682811737060547, "learning_rate": 1.68839204219607e-05, "loss": 3.437662124633789, "step": 19370 }, { "epoch": 0.15643792933655143, "grad_norm": 1.8510124683380127, "learning_rate": 1.6882304952222483e-05, "loss": 3.003469467163086, "step": 19380 }, { "epoch": 0.1565186506623185, "grad_norm": 1.3350367546081543, "learning_rate": 1.688068948248427e-05, "loss": 3.3238624572753905, "step": 19390 }, { "epoch": 0.15659937198808555, "grad_norm": 1.2174171209335327, "learning_rate": 1.6879074012746058e-05, "loss": 3.0048261642456056, "step": 19400 }, { "epoch": 0.15668009331385258, "grad_norm": 1.1859607696533203, "learning_rate": 1.6877458543007846e-05, "loss": 3.6974723815917967, "step": 19410 }, { "epoch": 0.15676081463961963, "grad_norm": 1.9490084648132324, "learning_rate": 1.687584307326963e-05, "loss": 3.6710365295410154, "step": 19420 }, { "epoch": 0.1568415359653867, "grad_norm": 1.2197284698486328, "learning_rate": 1.6874227603531418e-05, "loss": 3.330828094482422, "step": 19430 }, { "epoch": 0.15692225729115375, "grad_norm": 0.6042201519012451, "learning_rate": 1.6872612133793206e-05, "loss": 3.3463886260986326, "step": 19440 }, { "epoch": 0.1570029786169208, "grad_norm": 0.8697660565376282, "learning_rate": 1.6870996664054993e-05, "loss": 3.0840709686279295, "step": 19450 }, { "epoch": 0.15708369994268787, "grad_norm": 1.0381048917770386, "learning_rate": 1.6869381194316778e-05, "loss": 3.146281623840332, "step": 19460 }, { "epoch": 0.15716442126845492, "grad_norm": 0.6657821536064148, "learning_rate": 1.6867765724578565e-05, "loss": 3.2882884979248046, "step": 19470 }, { "epoch": 0.15724514259422195, "grad_norm": 1.2090884447097778, "learning_rate": 1.6866150254840353e-05, "loss": 3.6582687377929686, "step": 19480 }, { "epoch": 0.157325863919989, "grad_norm": 1.1902879476547241, "learning_rate": 1.686453478510214e-05, "loss": 3.1528234481811523, "step": 19490 }, { "epoch": 0.15740658524575607, "grad_norm": 0.8024628758430481, "learning_rate": 1.6862919315363925e-05, "loss": 2.9244462966918947, "step": 19500 }, { "epoch": 0.15748730657152313, "grad_norm": 1.2325942516326904, "learning_rate": 1.6861303845625713e-05, "loss": 3.43609504699707, "step": 19510 }, { "epoch": 0.15756802789729019, "grad_norm": 0.6969584822654724, "learning_rate": 1.68596883758875e-05, "loss": 3.7574222564697264, "step": 19520 }, { "epoch": 0.15764874922305724, "grad_norm": 1.0515141487121582, "learning_rate": 1.6858072906149288e-05, "loss": 3.7408145904541015, "step": 19530 }, { "epoch": 0.1577294705488243, "grad_norm": 1.5727159976959229, "learning_rate": 1.6856457436411072e-05, "loss": 3.1255088806152345, "step": 19540 }, { "epoch": 0.15781019187459136, "grad_norm": 1.1490434408187866, "learning_rate": 1.685484196667286e-05, "loss": 3.2316402435302733, "step": 19550 }, { "epoch": 0.1578909132003584, "grad_norm": 1.2691773176193237, "learning_rate": 1.6853226496934648e-05, "loss": 3.2739883422851563, "step": 19560 }, { "epoch": 0.15797163452612545, "grad_norm": 1.1244312524795532, "learning_rate": 1.6851611027196435e-05, "loss": 3.2676822662353517, "step": 19570 }, { "epoch": 0.1580523558518925, "grad_norm": 0.8945382237434387, "learning_rate": 1.684999555745822e-05, "loss": 3.494029235839844, "step": 19580 }, { "epoch": 0.15813307717765956, "grad_norm": 1.3513885736465454, "learning_rate": 1.6848380087720007e-05, "loss": 3.555217742919922, "step": 19590 }, { "epoch": 0.15821379850342662, "grad_norm": 0.7651785612106323, "learning_rate": 1.6846764617981795e-05, "loss": 3.683179473876953, "step": 19600 }, { "epoch": 0.15829451982919368, "grad_norm": 1.2057559490203857, "learning_rate": 1.6845149148243583e-05, "loss": 3.103193664550781, "step": 19610 }, { "epoch": 0.15837524115496074, "grad_norm": 0.9697821140289307, "learning_rate": 1.6843533678505367e-05, "loss": 2.904193305969238, "step": 19620 }, { "epoch": 0.1584559624807278, "grad_norm": 0.8084566593170166, "learning_rate": 1.6841918208767155e-05, "loss": 3.1508110046386717, "step": 19630 }, { "epoch": 0.15853668380649483, "grad_norm": 1.0293446779251099, "learning_rate": 1.6840302739028943e-05, "loss": 3.298568344116211, "step": 19640 }, { "epoch": 0.15861740513226188, "grad_norm": 1.0505801439285278, "learning_rate": 1.683868726929073e-05, "loss": 3.2563861846923827, "step": 19650 }, { "epoch": 0.15869812645802894, "grad_norm": 1.832578182220459, "learning_rate": 1.6837071799552515e-05, "loss": 3.3264923095703125, "step": 19660 }, { "epoch": 0.158778847783796, "grad_norm": 0.6027687788009644, "learning_rate": 1.6835456329814302e-05, "loss": 3.222981643676758, "step": 19670 }, { "epoch": 0.15885956910956306, "grad_norm": 1.465952754020691, "learning_rate": 1.683384086007609e-05, "loss": 3.1496465682983397, "step": 19680 }, { "epoch": 0.15894029043533012, "grad_norm": 1.6200069189071655, "learning_rate": 1.6832225390337878e-05, "loss": 3.1017330169677733, "step": 19690 }, { "epoch": 0.15902101176109718, "grad_norm": 0.9089183807373047, "learning_rate": 1.6830609920599662e-05, "loss": 2.8876792907714846, "step": 19700 }, { "epoch": 0.1591017330868642, "grad_norm": 1.1105023622512817, "learning_rate": 1.682899445086145e-05, "loss": 3.297906494140625, "step": 19710 }, { "epoch": 0.15918245441263126, "grad_norm": 0.5586901903152466, "learning_rate": 1.6827378981123237e-05, "loss": 3.6278060913085937, "step": 19720 }, { "epoch": 0.15926317573839832, "grad_norm": 0.7859391570091248, "learning_rate": 1.6825763511385025e-05, "loss": 3.233092498779297, "step": 19730 }, { "epoch": 0.15934389706416538, "grad_norm": 1.2861372232437134, "learning_rate": 1.682414804164681e-05, "loss": 3.212546539306641, "step": 19740 }, { "epoch": 0.15942461838993244, "grad_norm": 1.165067195892334, "learning_rate": 1.6822532571908597e-05, "loss": 3.193472480773926, "step": 19750 }, { "epoch": 0.1595053397156995, "grad_norm": 1.0164883136749268, "learning_rate": 1.6820917102170385e-05, "loss": 3.2474315643310545, "step": 19760 }, { "epoch": 0.15958606104146655, "grad_norm": 0.7287377715110779, "learning_rate": 1.6819301632432172e-05, "loss": 3.041180229187012, "step": 19770 }, { "epoch": 0.1596667823672336, "grad_norm": 0.8245355486869812, "learning_rate": 1.6817686162693957e-05, "loss": 3.1545413970947265, "step": 19780 }, { "epoch": 0.15974750369300064, "grad_norm": 0.8644742965698242, "learning_rate": 1.6816070692955744e-05, "loss": 3.378403091430664, "step": 19790 }, { "epoch": 0.1598282250187677, "grad_norm": 6.826494216918945, "learning_rate": 1.6814455223217532e-05, "loss": 3.6404422760009765, "step": 19800 }, { "epoch": 0.15990894634453476, "grad_norm": 1.2687726020812988, "learning_rate": 1.681283975347932e-05, "loss": 3.204130172729492, "step": 19810 }, { "epoch": 0.15998966767030182, "grad_norm": 1.0930699110031128, "learning_rate": 1.6811224283741104e-05, "loss": 3.196832847595215, "step": 19820 }, { "epoch": 0.16007038899606887, "grad_norm": 0.5652492642402649, "learning_rate": 1.6809608814002895e-05, "loss": 3.000986671447754, "step": 19830 }, { "epoch": 0.16015111032183593, "grad_norm": 1.3549306392669678, "learning_rate": 1.680799334426468e-05, "loss": 3.219467544555664, "step": 19840 }, { "epoch": 0.160231831647603, "grad_norm": 0.6950950026512146, "learning_rate": 1.6806377874526467e-05, "loss": 3.2205699920654296, "step": 19850 }, { "epoch": 0.16031255297337005, "grad_norm": 0.9197918176651001, "learning_rate": 1.680476240478825e-05, "loss": 2.8521343231201173, "step": 19860 }, { "epoch": 0.16039327429913708, "grad_norm": 0.9054350852966309, "learning_rate": 1.6803146935050042e-05, "loss": 3.192903518676758, "step": 19870 }, { "epoch": 0.16047399562490414, "grad_norm": 0.5389941334724426, "learning_rate": 1.6801531465311827e-05, "loss": 3.2898433685302733, "step": 19880 }, { "epoch": 0.1605547169506712, "grad_norm": 1.0886353254318237, "learning_rate": 1.6799915995573614e-05, "loss": 3.4488754272460938, "step": 19890 }, { "epoch": 0.16063543827643825, "grad_norm": 1.0617002248764038, "learning_rate": 1.67983005258354e-05, "loss": 3.190596008300781, "step": 19900 }, { "epoch": 0.1607161596022053, "grad_norm": 1.7557629346847534, "learning_rate": 1.679668505609719e-05, "loss": 3.4609001159667967, "step": 19910 }, { "epoch": 0.16079688092797237, "grad_norm": 0.8463329672813416, "learning_rate": 1.6795069586358974e-05, "loss": 3.492962646484375, "step": 19920 }, { "epoch": 0.16087760225373943, "grad_norm": 1.1933101415634155, "learning_rate": 1.6793454116620762e-05, "loss": 3.790948486328125, "step": 19930 }, { "epoch": 0.16095832357950646, "grad_norm": 0.8159456253051758, "learning_rate": 1.6791838646882546e-05, "loss": 3.0586307525634764, "step": 19940 }, { "epoch": 0.16103904490527352, "grad_norm": 1.3423963785171509, "learning_rate": 1.6790223177144337e-05, "loss": 3.8583065032958985, "step": 19950 }, { "epoch": 0.16111976623104057, "grad_norm": 1.3057582378387451, "learning_rate": 1.678860770740612e-05, "loss": 4.036865615844727, "step": 19960 }, { "epoch": 0.16120048755680763, "grad_norm": 0.8265758752822876, "learning_rate": 1.678699223766791e-05, "loss": 3.4473037719726562, "step": 19970 }, { "epoch": 0.1612812088825747, "grad_norm": 1.0887067317962646, "learning_rate": 1.6785376767929697e-05, "loss": 3.548365020751953, "step": 19980 }, { "epoch": 0.16136193020834175, "grad_norm": 0.8550687432289124, "learning_rate": 1.6783761298191485e-05, "loss": 3.407130813598633, "step": 19990 }, { "epoch": 0.1614426515341088, "grad_norm": 0.76761794090271, "learning_rate": 1.678214582845327e-05, "loss": 3.2763980865478515, "step": 20000 }, { "epoch": 0.16152337285987586, "grad_norm": 1.490136742591858, "learning_rate": 1.6780530358715057e-05, "loss": 3.49829216003418, "step": 20010 }, { "epoch": 0.1616040941856429, "grad_norm": 0.8989242911338806, "learning_rate": 1.6778914888976844e-05, "loss": 3.1829267501831056, "step": 20020 }, { "epoch": 0.16168481551140995, "grad_norm": 1.2677061557769775, "learning_rate": 1.6777299419238632e-05, "loss": 3.222350311279297, "step": 20030 }, { "epoch": 0.161765536837177, "grad_norm": 0.8102962374687195, "learning_rate": 1.6775683949500416e-05, "loss": 3.1889364242553713, "step": 20040 }, { "epoch": 0.16184625816294407, "grad_norm": 0.9582985639572144, "learning_rate": 1.6774068479762204e-05, "loss": 2.9764520645141603, "step": 20050 }, { "epoch": 0.16192697948871113, "grad_norm": 1.3118009567260742, "learning_rate": 1.6772453010023992e-05, "loss": 3.618195343017578, "step": 20060 }, { "epoch": 0.16200770081447818, "grad_norm": 1.1386301517486572, "learning_rate": 1.677083754028578e-05, "loss": 3.272583770751953, "step": 20070 }, { "epoch": 0.16208842214024524, "grad_norm": 0.8781171441078186, "learning_rate": 1.6769222070547564e-05, "loss": 3.452257537841797, "step": 20080 }, { "epoch": 0.16216914346601227, "grad_norm": 1.1624895334243774, "learning_rate": 1.676760660080935e-05, "loss": 3.155894088745117, "step": 20090 }, { "epoch": 0.16224986479177933, "grad_norm": 1.1722028255462646, "learning_rate": 1.676599113107114e-05, "loss": 3.6315452575683596, "step": 20100 }, { "epoch": 0.1623305861175464, "grad_norm": 1.2486344575881958, "learning_rate": 1.6764375661332927e-05, "loss": 3.8960029602050783, "step": 20110 }, { "epoch": 0.16241130744331345, "grad_norm": 1.8575458526611328, "learning_rate": 1.676276019159471e-05, "loss": 3.8790401458740233, "step": 20120 }, { "epoch": 0.1624920287690805, "grad_norm": 1.5504173040390015, "learning_rate": 1.67611447218565e-05, "loss": 3.183320236206055, "step": 20130 }, { "epoch": 0.16257275009484756, "grad_norm": 0.6193904280662537, "learning_rate": 1.6759529252118286e-05, "loss": 3.0549354553222656, "step": 20140 }, { "epoch": 0.16265347142061462, "grad_norm": 1.3082395792007446, "learning_rate": 1.6757913782380074e-05, "loss": 3.463969421386719, "step": 20150 }, { "epoch": 0.16273419274638168, "grad_norm": 2.6513094902038574, "learning_rate": 1.675629831264186e-05, "loss": 3.255580520629883, "step": 20160 }, { "epoch": 0.1628149140721487, "grad_norm": 0.8769596219062805, "learning_rate": 1.6754682842903646e-05, "loss": 3.321358489990234, "step": 20170 }, { "epoch": 0.16289563539791577, "grad_norm": 1.286618947982788, "learning_rate": 1.6753067373165434e-05, "loss": 3.288553237915039, "step": 20180 }, { "epoch": 0.16297635672368282, "grad_norm": 0.7052440047264099, "learning_rate": 1.675145190342722e-05, "loss": 3.5751422882080077, "step": 20190 }, { "epoch": 0.16305707804944988, "grad_norm": 1.1612416505813599, "learning_rate": 1.6749836433689006e-05, "loss": 3.508290100097656, "step": 20200 }, { "epoch": 0.16313779937521694, "grad_norm": 1.2276661396026611, "learning_rate": 1.6748220963950794e-05, "loss": 2.86676025390625, "step": 20210 }, { "epoch": 0.163218520700984, "grad_norm": 0.7069766521453857, "learning_rate": 1.674660549421258e-05, "loss": 3.251699447631836, "step": 20220 }, { "epoch": 0.16329924202675106, "grad_norm": 1.1179205179214478, "learning_rate": 1.674499002447437e-05, "loss": 3.0572111129760744, "step": 20230 }, { "epoch": 0.16337996335251811, "grad_norm": 0.7651769518852234, "learning_rate": 1.6743374554736153e-05, "loss": 3.3384845733642576, "step": 20240 }, { "epoch": 0.16346068467828515, "grad_norm": 1.1613967418670654, "learning_rate": 1.674175908499794e-05, "loss": 3.433597946166992, "step": 20250 }, { "epoch": 0.1635414060040522, "grad_norm": 0.9028785228729248, "learning_rate": 1.674014361525973e-05, "loss": 2.9558218002319334, "step": 20260 }, { "epoch": 0.16362212732981926, "grad_norm": 0.9066702127456665, "learning_rate": 1.6738528145521516e-05, "loss": 3.5469337463378907, "step": 20270 }, { "epoch": 0.16370284865558632, "grad_norm": 1.2632380723953247, "learning_rate": 1.6736912675783304e-05, "loss": 3.6733829498291017, "step": 20280 }, { "epoch": 0.16378356998135338, "grad_norm": 1.5346622467041016, "learning_rate": 1.673529720604509e-05, "loss": 3.866115188598633, "step": 20290 }, { "epoch": 0.16386429130712044, "grad_norm": 0.7460582256317139, "learning_rate": 1.6733681736306876e-05, "loss": 3.2132518768310545, "step": 20300 }, { "epoch": 0.1639450126328875, "grad_norm": 0.8332839012145996, "learning_rate": 1.6732066266568664e-05, "loss": 3.0585454940795898, "step": 20310 }, { "epoch": 0.16402573395865452, "grad_norm": 0.840008556842804, "learning_rate": 1.673045079683045e-05, "loss": 3.1656116485595702, "step": 20320 }, { "epoch": 0.16410645528442158, "grad_norm": 1.0392301082611084, "learning_rate": 1.6728835327092236e-05, "loss": 3.1500879287719727, "step": 20330 }, { "epoch": 0.16418717661018864, "grad_norm": 1.0433626174926758, "learning_rate": 1.6727219857354023e-05, "loss": 4.230348205566406, "step": 20340 }, { "epoch": 0.1642678979359557, "grad_norm": 0.9067502617835999, "learning_rate": 1.672560438761581e-05, "loss": 4.014822006225586, "step": 20350 }, { "epoch": 0.16434861926172276, "grad_norm": 0.5844142436981201, "learning_rate": 1.67239889178776e-05, "loss": 3.5813571929931642, "step": 20360 }, { "epoch": 0.16442934058748981, "grad_norm": 0.6896055340766907, "learning_rate": 1.6722373448139383e-05, "loss": 3.40319938659668, "step": 20370 }, { "epoch": 0.16451006191325687, "grad_norm": 1.0679888725280762, "learning_rate": 1.672075797840117e-05, "loss": 3.0629968643188477, "step": 20380 }, { "epoch": 0.16459078323902393, "grad_norm": 1.7457795143127441, "learning_rate": 1.671914250866296e-05, "loss": 3.543509674072266, "step": 20390 }, { "epoch": 0.16467150456479096, "grad_norm": 1.0178465843200684, "learning_rate": 1.6717527038924746e-05, "loss": 2.9248125076293947, "step": 20400 }, { "epoch": 0.16475222589055802, "grad_norm": 0.9411331415176392, "learning_rate": 1.671591156918653e-05, "loss": 3.5539302825927734, "step": 20410 }, { "epoch": 0.16483294721632508, "grad_norm": 0.8522791266441345, "learning_rate": 1.6714296099448318e-05, "loss": 3.504026412963867, "step": 20420 }, { "epoch": 0.16491366854209213, "grad_norm": 1.2608697414398193, "learning_rate": 1.6712680629710106e-05, "loss": 2.7665122985839843, "step": 20430 }, { "epoch": 0.1649943898678592, "grad_norm": 1.1437433958053589, "learning_rate": 1.6711065159971894e-05, "loss": 3.041089630126953, "step": 20440 }, { "epoch": 0.16507511119362625, "grad_norm": 1.2362717390060425, "learning_rate": 1.6709449690233678e-05, "loss": 2.7480854034423827, "step": 20450 }, { "epoch": 0.1651558325193933, "grad_norm": 1.1031712293624878, "learning_rate": 1.6707834220495466e-05, "loss": 3.4253841400146485, "step": 20460 }, { "epoch": 0.16523655384516037, "grad_norm": 0.9559452533721924, "learning_rate": 1.6706218750757253e-05, "loss": 3.075661849975586, "step": 20470 }, { "epoch": 0.1653172751709274, "grad_norm": 1.0939202308654785, "learning_rate": 1.670460328101904e-05, "loss": 3.6010547637939454, "step": 20480 }, { "epoch": 0.16539799649669445, "grad_norm": 0.7594557404518127, "learning_rate": 1.6702987811280825e-05, "loss": 3.3993148803710938, "step": 20490 }, { "epoch": 0.1654787178224615, "grad_norm": 1.7584625482559204, "learning_rate": 1.6701372341542613e-05, "loss": 3.1432525634765627, "step": 20500 }, { "epoch": 0.16555943914822857, "grad_norm": 1.0694128274917603, "learning_rate": 1.66997568718044e-05, "loss": 2.7223066329956054, "step": 20510 }, { "epoch": 0.16564016047399563, "grad_norm": 0.9761273860931396, "learning_rate": 1.669814140206619e-05, "loss": 3.589689254760742, "step": 20520 }, { "epoch": 0.1657208817997627, "grad_norm": 0.697289764881134, "learning_rate": 1.6696525932327973e-05, "loss": 3.656343460083008, "step": 20530 }, { "epoch": 0.16580160312552975, "grad_norm": 1.1738007068634033, "learning_rate": 1.669491046258976e-05, "loss": 3.014327812194824, "step": 20540 }, { "epoch": 0.16588232445129678, "grad_norm": 1.147898554801941, "learning_rate": 1.6693294992851548e-05, "loss": 3.671358108520508, "step": 20550 }, { "epoch": 0.16596304577706383, "grad_norm": 0.7602994441986084, "learning_rate": 1.6691679523113336e-05, "loss": 3.9381317138671874, "step": 20560 }, { "epoch": 0.1660437671028309, "grad_norm": 0.890841543674469, "learning_rate": 1.669006405337512e-05, "loss": 3.831471633911133, "step": 20570 }, { "epoch": 0.16612448842859795, "grad_norm": 0.9891884922981262, "learning_rate": 1.6688448583636908e-05, "loss": 3.482648468017578, "step": 20580 }, { "epoch": 0.166205209754365, "grad_norm": 1.1265558004379272, "learning_rate": 1.6686833113898695e-05, "loss": 3.784354400634766, "step": 20590 }, { "epoch": 0.16628593108013207, "grad_norm": 0.9298624992370605, "learning_rate": 1.6685217644160483e-05, "loss": 3.233866500854492, "step": 20600 }, { "epoch": 0.16636665240589912, "grad_norm": 0.7540340423583984, "learning_rate": 1.6683602174422267e-05, "loss": 3.4391830444335936, "step": 20610 }, { "epoch": 0.16644737373166618, "grad_norm": 0.8399866819381714, "learning_rate": 1.6681986704684055e-05, "loss": 2.841143798828125, "step": 20620 }, { "epoch": 0.1665280950574332, "grad_norm": 0.7700679302215576, "learning_rate": 1.6680371234945843e-05, "loss": 3.405016326904297, "step": 20630 }, { "epoch": 0.16660881638320027, "grad_norm": 1.3598480224609375, "learning_rate": 1.667875576520763e-05, "loss": 3.053544807434082, "step": 20640 }, { "epoch": 0.16668953770896733, "grad_norm": 1.0750925540924072, "learning_rate": 1.6677140295469415e-05, "loss": 3.0662376403808596, "step": 20650 }, { "epoch": 0.16677025903473439, "grad_norm": 1.4102250337600708, "learning_rate": 1.6675524825731202e-05, "loss": 3.3239078521728516, "step": 20660 }, { "epoch": 0.16685098036050144, "grad_norm": 0.9275026321411133, "learning_rate": 1.667390935599299e-05, "loss": 2.9233890533447267, "step": 20670 }, { "epoch": 0.1669317016862685, "grad_norm": 0.7439488768577576, "learning_rate": 1.6672293886254778e-05, "loss": 3.1956083297729494, "step": 20680 }, { "epoch": 0.16701242301203556, "grad_norm": 1.1241710186004639, "learning_rate": 1.6670678416516562e-05, "loss": 3.370554733276367, "step": 20690 }, { "epoch": 0.1670931443378026, "grad_norm": 0.9804696440696716, "learning_rate": 1.6669062946778353e-05, "loss": 3.837424087524414, "step": 20700 }, { "epoch": 0.16717386566356965, "grad_norm": 1.3815747499465942, "learning_rate": 1.6667447477040138e-05, "loss": 3.057392883300781, "step": 20710 }, { "epoch": 0.1672545869893367, "grad_norm": 1.1049901247024536, "learning_rate": 1.6665832007301925e-05, "loss": 3.5404922485351564, "step": 20720 }, { "epoch": 0.16733530831510376, "grad_norm": 0.9142563343048096, "learning_rate": 1.666421653756371e-05, "loss": 2.860765838623047, "step": 20730 }, { "epoch": 0.16741602964087082, "grad_norm": 1.3383992910385132, "learning_rate": 1.66626010678255e-05, "loss": 3.2577754974365236, "step": 20740 }, { "epoch": 0.16749675096663788, "grad_norm": 1.3390038013458252, "learning_rate": 1.6660985598087285e-05, "loss": 3.1189123153686524, "step": 20750 }, { "epoch": 0.16757747229240494, "grad_norm": 0.6514583230018616, "learning_rate": 1.6659370128349073e-05, "loss": 3.396142578125, "step": 20760 }, { "epoch": 0.167658193618172, "grad_norm": 0.7346383333206177, "learning_rate": 1.6657754658610857e-05, "loss": 3.1865386962890625, "step": 20770 }, { "epoch": 0.16773891494393903, "grad_norm": 1.4548213481903076, "learning_rate": 1.6656139188872648e-05, "loss": 3.173826789855957, "step": 20780 }, { "epoch": 0.16781963626970609, "grad_norm": 0.9058336019515991, "learning_rate": 1.6654523719134432e-05, "loss": 3.315359878540039, "step": 20790 }, { "epoch": 0.16790035759547314, "grad_norm": 1.5698013305664062, "learning_rate": 1.665290824939622e-05, "loss": 3.066172790527344, "step": 20800 }, { "epoch": 0.1679810789212402, "grad_norm": 1.5300688743591309, "learning_rate": 1.6651292779658004e-05, "loss": 3.5743549346923826, "step": 20810 }, { "epoch": 0.16806180024700726, "grad_norm": 0.7724844813346863, "learning_rate": 1.6649677309919795e-05, "loss": 3.202031707763672, "step": 20820 }, { "epoch": 0.16814252157277432, "grad_norm": 0.8783177137374878, "learning_rate": 1.664806184018158e-05, "loss": 3.520656967163086, "step": 20830 }, { "epoch": 0.16822324289854138, "grad_norm": 0.7653866410255432, "learning_rate": 1.6646446370443367e-05, "loss": 3.0011627197265627, "step": 20840 }, { "epoch": 0.16830396422430843, "grad_norm": 0.5983294248580933, "learning_rate": 1.6644830900705155e-05, "loss": 3.3133171081542967, "step": 20850 }, { "epoch": 0.16838468555007546, "grad_norm": 0.9191200137138367, "learning_rate": 1.6643215430966943e-05, "loss": 3.152588653564453, "step": 20860 }, { "epoch": 0.16846540687584252, "grad_norm": 1.3569049835205078, "learning_rate": 1.6641599961228727e-05, "loss": 3.6427703857421876, "step": 20870 }, { "epoch": 0.16854612820160958, "grad_norm": 0.831195056438446, "learning_rate": 1.6639984491490515e-05, "loss": 3.1575510025024416, "step": 20880 }, { "epoch": 0.16862684952737664, "grad_norm": 0.9798173308372498, "learning_rate": 1.6638369021752302e-05, "loss": 3.4543331146240233, "step": 20890 }, { "epoch": 0.1687075708531437, "grad_norm": 0.7493632435798645, "learning_rate": 1.663675355201409e-05, "loss": 2.9897212982177734, "step": 20900 }, { "epoch": 0.16878829217891075, "grad_norm": 0.9904429316520691, "learning_rate": 1.6635138082275874e-05, "loss": 3.7103748321533203, "step": 20910 }, { "epoch": 0.1688690135046778, "grad_norm": 0.7666990756988525, "learning_rate": 1.6633522612537662e-05, "loss": 2.8384319305419923, "step": 20920 }, { "epoch": 0.16894973483044484, "grad_norm": 1.8214726448059082, "learning_rate": 1.663190714279945e-05, "loss": 3.1159772872924805, "step": 20930 }, { "epoch": 0.1690304561562119, "grad_norm": 1.9123831987380981, "learning_rate": 1.6630291673061238e-05, "loss": 3.3242828369140627, "step": 20940 }, { "epoch": 0.16911117748197896, "grad_norm": 0.8667173385620117, "learning_rate": 1.6628676203323022e-05, "loss": 3.579207992553711, "step": 20950 }, { "epoch": 0.16919189880774602, "grad_norm": 1.1364054679870605, "learning_rate": 1.662706073358481e-05, "loss": 3.628324508666992, "step": 20960 }, { "epoch": 0.16927262013351307, "grad_norm": 0.850311815738678, "learning_rate": 1.6625445263846597e-05, "loss": 3.1050390243530273, "step": 20970 }, { "epoch": 0.16935334145928013, "grad_norm": 0.9068384170532227, "learning_rate": 1.6623829794108385e-05, "loss": 3.1397817611694334, "step": 20980 }, { "epoch": 0.1694340627850472, "grad_norm": 0.772229790687561, "learning_rate": 1.662221432437017e-05, "loss": 3.1191837310791017, "step": 20990 }, { "epoch": 0.16951478411081425, "grad_norm": 1.213082194328308, "learning_rate": 1.6620598854631957e-05, "loss": 3.2565029144287108, "step": 21000 }, { "epoch": 0.16959550543658128, "grad_norm": 0.6476767063140869, "learning_rate": 1.6618983384893745e-05, "loss": 3.3663116455078126, "step": 21010 }, { "epoch": 0.16967622676234834, "grad_norm": 1.5524877309799194, "learning_rate": 1.6617367915155532e-05, "loss": 3.4828163146972657, "step": 21020 }, { "epoch": 0.1697569480881154, "grad_norm": 0.5453366041183472, "learning_rate": 1.6615752445417317e-05, "loss": 3.0890636444091797, "step": 21030 }, { "epoch": 0.16983766941388245, "grad_norm": 1.3386387825012207, "learning_rate": 1.6614136975679104e-05, "loss": 3.2824752807617186, "step": 21040 }, { "epoch": 0.1699183907396495, "grad_norm": 1.127640962600708, "learning_rate": 1.6612521505940892e-05, "loss": 3.469430160522461, "step": 21050 }, { "epoch": 0.16999911206541657, "grad_norm": 0.5342042446136475, "learning_rate": 1.661090603620268e-05, "loss": 3.5845268249511717, "step": 21060 }, { "epoch": 0.17007983339118363, "grad_norm": 0.713367223739624, "learning_rate": 1.6609290566464464e-05, "loss": 3.9411460876464846, "step": 21070 }, { "epoch": 0.17016055471695068, "grad_norm": 0.9650131464004517, "learning_rate": 1.660767509672625e-05, "loss": 3.6133499145507812, "step": 21080 }, { "epoch": 0.17024127604271772, "grad_norm": 0.7877477407455444, "learning_rate": 1.660605962698804e-05, "loss": 3.393380355834961, "step": 21090 }, { "epoch": 0.17032199736848477, "grad_norm": 0.8913084268569946, "learning_rate": 1.6604444157249827e-05, "loss": 3.0273998260498045, "step": 21100 }, { "epoch": 0.17040271869425183, "grad_norm": 0.914892852306366, "learning_rate": 1.660282868751161e-05, "loss": 3.18251895904541, "step": 21110 }, { "epoch": 0.1704834400200189, "grad_norm": 0.6923007965087891, "learning_rate": 1.66012132177734e-05, "loss": 3.2649349212646483, "step": 21120 }, { "epoch": 0.17056416134578595, "grad_norm": 0.7088398933410645, "learning_rate": 1.6599597748035187e-05, "loss": 3.1476423263549806, "step": 21130 }, { "epoch": 0.170644882671553, "grad_norm": 1.3429068326950073, "learning_rate": 1.6597982278296974e-05, "loss": 3.0634252548217775, "step": 21140 }, { "epoch": 0.17072560399732006, "grad_norm": 0.631168782711029, "learning_rate": 1.659636680855876e-05, "loss": 3.102588653564453, "step": 21150 }, { "epoch": 0.1708063253230871, "grad_norm": 1.1298773288726807, "learning_rate": 1.6594751338820546e-05, "loss": 3.0516645431518556, "step": 21160 }, { "epoch": 0.17088704664885415, "grad_norm": 1.4168529510498047, "learning_rate": 1.6593135869082334e-05, "loss": 3.5331722259521485, "step": 21170 }, { "epoch": 0.1709677679746212, "grad_norm": 0.9374449849128723, "learning_rate": 1.6591520399344122e-05, "loss": 3.5102344512939454, "step": 21180 }, { "epoch": 0.17104848930038827, "grad_norm": 1.0926251411437988, "learning_rate": 1.6589904929605906e-05, "loss": 3.4370101928710937, "step": 21190 }, { "epoch": 0.17112921062615533, "grad_norm": 0.6887526512145996, "learning_rate": 1.6588289459867694e-05, "loss": 3.1492977142333984, "step": 21200 }, { "epoch": 0.17120993195192238, "grad_norm": 0.9227898120880127, "learning_rate": 1.658667399012948e-05, "loss": 3.137685775756836, "step": 21210 }, { "epoch": 0.17129065327768944, "grad_norm": 0.7746596932411194, "learning_rate": 1.658505852039127e-05, "loss": 3.2691417694091798, "step": 21220 }, { "epoch": 0.1713713746034565, "grad_norm": 1.2804694175720215, "learning_rate": 1.6583443050653054e-05, "loss": 3.155120849609375, "step": 21230 }, { "epoch": 0.17145209592922353, "grad_norm": 0.9436755180358887, "learning_rate": 1.658182758091484e-05, "loss": 3.2187568664550783, "step": 21240 }, { "epoch": 0.1715328172549906, "grad_norm": 0.5613658428192139, "learning_rate": 1.658021211117663e-05, "loss": 3.1881740570068358, "step": 21250 }, { "epoch": 0.17161353858075765, "grad_norm": 0.7418566346168518, "learning_rate": 1.6578596641438417e-05, "loss": 3.5392276763916017, "step": 21260 }, { "epoch": 0.1716942599065247, "grad_norm": 0.6799067854881287, "learning_rate": 1.65769811717002e-05, "loss": 2.9334861755371096, "step": 21270 }, { "epoch": 0.17177498123229176, "grad_norm": 1.1851344108581543, "learning_rate": 1.657536570196199e-05, "loss": 3.299082565307617, "step": 21280 }, { "epoch": 0.17185570255805882, "grad_norm": 1.1253585815429688, "learning_rate": 1.6573750232223776e-05, "loss": 3.3084331512451173, "step": 21290 }, { "epoch": 0.17193642388382588, "grad_norm": 0.8601242303848267, "learning_rate": 1.6572134762485564e-05, "loss": 3.5055862426757813, "step": 21300 }, { "epoch": 0.1720171452095929, "grad_norm": 0.7246654033660889, "learning_rate": 1.6570519292747348e-05, "loss": 3.0907360076904298, "step": 21310 }, { "epoch": 0.17209786653535997, "grad_norm": 1.2041507959365845, "learning_rate": 1.6568903823009136e-05, "loss": 3.3997344970703125, "step": 21320 }, { "epoch": 0.17217858786112702, "grad_norm": 0.956130862236023, "learning_rate": 1.6567288353270924e-05, "loss": 3.026799964904785, "step": 21330 }, { "epoch": 0.17225930918689408, "grad_norm": 1.1205120086669922, "learning_rate": 1.656567288353271e-05, "loss": 3.3636016845703125, "step": 21340 }, { "epoch": 0.17234003051266114, "grad_norm": 0.9093880653381348, "learning_rate": 1.6564057413794496e-05, "loss": 3.0930458068847657, "step": 21350 }, { "epoch": 0.1724207518384282, "grad_norm": 0.6103731393814087, "learning_rate": 1.6562441944056283e-05, "loss": 3.5458297729492188, "step": 21360 }, { "epoch": 0.17250147316419526, "grad_norm": 0.8494190573692322, "learning_rate": 1.656082647431807e-05, "loss": 3.2814937591552735, "step": 21370 }, { "epoch": 0.17258219448996232, "grad_norm": 0.7368024587631226, "learning_rate": 1.655921100457986e-05, "loss": 3.243033218383789, "step": 21380 }, { "epoch": 0.17266291581572935, "grad_norm": 1.1294411420822144, "learning_rate": 1.6557595534841643e-05, "loss": 3.4421245574951174, "step": 21390 }, { "epoch": 0.1727436371414964, "grad_norm": 1.032360553741455, "learning_rate": 1.655598006510343e-05, "loss": 3.4541255950927736, "step": 21400 }, { "epoch": 0.17282435846726346, "grad_norm": 1.5454630851745605, "learning_rate": 1.655436459536522e-05, "loss": 2.9360923767089844, "step": 21410 }, { "epoch": 0.17290507979303052, "grad_norm": 1.0752345323562622, "learning_rate": 1.6552749125627006e-05, "loss": 3.162136268615723, "step": 21420 }, { "epoch": 0.17298580111879758, "grad_norm": 0.7194427251815796, "learning_rate": 1.655113365588879e-05, "loss": 3.418260955810547, "step": 21430 }, { "epoch": 0.17306652244456464, "grad_norm": 1.43379545211792, "learning_rate": 1.6549518186150578e-05, "loss": 2.7927040100097655, "step": 21440 }, { "epoch": 0.1731472437703317, "grad_norm": 1.1592222452163696, "learning_rate": 1.6547902716412366e-05, "loss": 3.060025215148926, "step": 21450 }, { "epoch": 0.17322796509609875, "grad_norm": 0.6758761405944824, "learning_rate": 1.6546287246674154e-05, "loss": 3.4825454711914063, "step": 21460 }, { "epoch": 0.17330868642186578, "grad_norm": 1.2687488794326782, "learning_rate": 1.6544671776935938e-05, "loss": 3.5971611022949217, "step": 21470 }, { "epoch": 0.17338940774763284, "grad_norm": 0.9699406027793884, "learning_rate": 1.6543056307197726e-05, "loss": 3.2979183197021484, "step": 21480 }, { "epoch": 0.1734701290733999, "grad_norm": 1.3085533380508423, "learning_rate": 1.6541440837459513e-05, "loss": 3.132694625854492, "step": 21490 }, { "epoch": 0.17355085039916696, "grad_norm": 1.0244072675704956, "learning_rate": 1.65398253677213e-05, "loss": 3.3357177734375, "step": 21500 }, { "epoch": 0.17363157172493401, "grad_norm": 1.6135064363479614, "learning_rate": 1.6538209897983085e-05, "loss": 3.0929729461669924, "step": 21510 }, { "epoch": 0.17371229305070107, "grad_norm": 1.1204811334609985, "learning_rate": 1.6536594428244873e-05, "loss": 2.8469676971435547, "step": 21520 }, { "epoch": 0.17379301437646813, "grad_norm": 0.9788176417350769, "learning_rate": 1.653497895850666e-05, "loss": 3.374275970458984, "step": 21530 }, { "epoch": 0.17387373570223516, "grad_norm": 1.346339464187622, "learning_rate": 1.6533363488768448e-05, "loss": 3.164939308166504, "step": 21540 }, { "epoch": 0.17395445702800222, "grad_norm": 0.7750789523124695, "learning_rate": 1.6531748019030236e-05, "loss": 3.0845272064208986, "step": 21550 }, { "epoch": 0.17403517835376928, "grad_norm": 1.7149237394332886, "learning_rate": 1.653013254929202e-05, "loss": 3.6220226287841797, "step": 21560 }, { "epoch": 0.17411589967953633, "grad_norm": 0.7492220401763916, "learning_rate": 1.652851707955381e-05, "loss": 2.9243047714233397, "step": 21570 }, { "epoch": 0.1741966210053034, "grad_norm": 1.0561540126800537, "learning_rate": 1.6526901609815596e-05, "loss": 3.428772735595703, "step": 21580 }, { "epoch": 0.17427734233107045, "grad_norm": 1.4390449523925781, "learning_rate": 1.6525286140077383e-05, "loss": 3.194455146789551, "step": 21590 }, { "epoch": 0.1743580636568375, "grad_norm": 1.5873535871505737, "learning_rate": 1.6523670670339168e-05, "loss": 3.244717025756836, "step": 21600 }, { "epoch": 0.17443878498260457, "grad_norm": 1.0045137405395508, "learning_rate": 1.652205520060096e-05, "loss": 3.0455814361572267, "step": 21610 }, { "epoch": 0.1745195063083716, "grad_norm": 0.952263593673706, "learning_rate": 1.6520439730862743e-05, "loss": 2.8838827133178713, "step": 21620 }, { "epoch": 0.17460022763413865, "grad_norm": 0.8145366311073303, "learning_rate": 1.651882426112453e-05, "loss": 3.4602012634277344, "step": 21630 }, { "epoch": 0.1746809489599057, "grad_norm": 1.0684934854507446, "learning_rate": 1.6517208791386315e-05, "loss": 3.221847152709961, "step": 21640 }, { "epoch": 0.17476167028567277, "grad_norm": 1.1157050132751465, "learning_rate": 1.6515593321648106e-05, "loss": 3.511288070678711, "step": 21650 }, { "epoch": 0.17484239161143983, "grad_norm": 0.9232501983642578, "learning_rate": 1.651397785190989e-05, "loss": 3.0443801879882812, "step": 21660 }, { "epoch": 0.1749231129372069, "grad_norm": 0.6878831386566162, "learning_rate": 1.6512362382171678e-05, "loss": 3.4993331909179686, "step": 21670 }, { "epoch": 0.17500383426297395, "grad_norm": 1.2140640020370483, "learning_rate": 1.6510746912433462e-05, "loss": 3.2533416748046875, "step": 21680 }, { "epoch": 0.175084555588741, "grad_norm": 0.8987119197845459, "learning_rate": 1.6509131442695253e-05, "loss": 3.0548398971557615, "step": 21690 }, { "epoch": 0.17516527691450803, "grad_norm": 0.6711673736572266, "learning_rate": 1.6507515972957038e-05, "loss": 3.420939254760742, "step": 21700 }, { "epoch": 0.1752459982402751, "grad_norm": 0.7048670649528503, "learning_rate": 1.6505900503218825e-05, "loss": 3.268508529663086, "step": 21710 }, { "epoch": 0.17532671956604215, "grad_norm": 0.8688015937805176, "learning_rate": 1.6504285033480613e-05, "loss": 2.784251403808594, "step": 21720 }, { "epoch": 0.1754074408918092, "grad_norm": 1.1497397422790527, "learning_rate": 1.65026695637424e-05, "loss": 3.313123321533203, "step": 21730 }, { "epoch": 0.17548816221757627, "grad_norm": 1.2171725034713745, "learning_rate": 1.6501054094004185e-05, "loss": 3.407034683227539, "step": 21740 }, { "epoch": 0.17556888354334332, "grad_norm": 1.027492880821228, "learning_rate": 1.6499438624265973e-05, "loss": 3.0407735824584963, "step": 21750 }, { "epoch": 0.17564960486911038, "grad_norm": 1.2362136840820312, "learning_rate": 1.649782315452776e-05, "loss": 2.957407760620117, "step": 21760 }, { "epoch": 0.1757303261948774, "grad_norm": 1.1152188777923584, "learning_rate": 1.6496207684789548e-05, "loss": 3.6715316772460938, "step": 21770 }, { "epoch": 0.17581104752064447, "grad_norm": 0.6362349987030029, "learning_rate": 1.6494592215051333e-05, "loss": 2.888118553161621, "step": 21780 }, { "epoch": 0.17589176884641153, "grad_norm": 0.5618848204612732, "learning_rate": 1.649297674531312e-05, "loss": 3.1548641204833983, "step": 21790 }, { "epoch": 0.1759724901721786, "grad_norm": 0.8109168410301208, "learning_rate": 1.6491361275574908e-05, "loss": 3.9122783660888674, "step": 21800 }, { "epoch": 0.17605321149794564, "grad_norm": 0.659801185131073, "learning_rate": 1.6489745805836696e-05, "loss": 3.2121063232421876, "step": 21810 }, { "epoch": 0.1761339328237127, "grad_norm": 0.9492382407188416, "learning_rate": 1.648813033609848e-05, "loss": 2.808736801147461, "step": 21820 }, { "epoch": 0.17621465414947976, "grad_norm": 0.8573606014251709, "learning_rate": 1.6486514866360268e-05, "loss": 3.109349250793457, "step": 21830 }, { "epoch": 0.17629537547524682, "grad_norm": 0.7768137454986572, "learning_rate": 1.6484899396622055e-05, "loss": 3.2198040008544924, "step": 21840 }, { "epoch": 0.17637609680101385, "grad_norm": 1.006259560585022, "learning_rate": 1.6483283926883843e-05, "loss": 3.015010452270508, "step": 21850 }, { "epoch": 0.1764568181267809, "grad_norm": 1.2714483737945557, "learning_rate": 1.6481668457145627e-05, "loss": 3.1534976959228516, "step": 21860 }, { "epoch": 0.17653753945254796, "grad_norm": 0.7221633791923523, "learning_rate": 1.6480052987407415e-05, "loss": 3.055453872680664, "step": 21870 }, { "epoch": 0.17661826077831502, "grad_norm": 0.6501348614692688, "learning_rate": 1.6478437517669203e-05, "loss": 3.392421340942383, "step": 21880 }, { "epoch": 0.17669898210408208, "grad_norm": 0.8529218435287476, "learning_rate": 1.647682204793099e-05, "loss": 3.1969696044921876, "step": 21890 }, { "epoch": 0.17677970342984914, "grad_norm": 1.7984955310821533, "learning_rate": 1.6475206578192775e-05, "loss": 3.3034435272216798, "step": 21900 }, { "epoch": 0.1768604247556162, "grad_norm": 0.8255242705345154, "learning_rate": 1.6473591108454562e-05, "loss": 3.2175823211669923, "step": 21910 }, { "epoch": 0.17694114608138323, "grad_norm": 0.8038454651832581, "learning_rate": 1.647197563871635e-05, "loss": 3.1327985763549804, "step": 21920 }, { "epoch": 0.17702186740715029, "grad_norm": 0.9322090148925781, "learning_rate": 1.6470360168978138e-05, "loss": 2.938159942626953, "step": 21930 }, { "epoch": 0.17710258873291734, "grad_norm": 0.9858022332191467, "learning_rate": 1.6468744699239922e-05, "loss": 3.184012222290039, "step": 21940 }, { "epoch": 0.1771833100586844, "grad_norm": 1.0030417442321777, "learning_rate": 1.646712922950171e-05, "loss": 3.2370559692382814, "step": 21950 }, { "epoch": 0.17726403138445146, "grad_norm": 1.121917963027954, "learning_rate": 1.6465513759763497e-05, "loss": 3.581100845336914, "step": 21960 }, { "epoch": 0.17734475271021852, "grad_norm": 0.6965534687042236, "learning_rate": 1.6463898290025285e-05, "loss": 3.042466163635254, "step": 21970 }, { "epoch": 0.17742547403598558, "grad_norm": 1.3194071054458618, "learning_rate": 1.646228282028707e-05, "loss": 3.580617904663086, "step": 21980 }, { "epoch": 0.17750619536175263, "grad_norm": 0.9806872010231018, "learning_rate": 1.6460667350548857e-05, "loss": 3.3253761291503907, "step": 21990 }, { "epoch": 0.17758691668751966, "grad_norm": 0.6343050599098206, "learning_rate": 1.6459051880810645e-05, "loss": 2.865622329711914, "step": 22000 }, { "epoch": 0.17766763801328672, "grad_norm": 1.2889944314956665, "learning_rate": 1.6457436411072433e-05, "loss": 3.1947048187255858, "step": 22010 }, { "epoch": 0.17774835933905378, "grad_norm": 0.9412117004394531, "learning_rate": 1.6455820941334217e-05, "loss": 2.769874382019043, "step": 22020 }, { "epoch": 0.17782908066482084, "grad_norm": 0.7587072849273682, "learning_rate": 1.6454205471596005e-05, "loss": 3.056243133544922, "step": 22030 }, { "epoch": 0.1779098019905879, "grad_norm": 0.794593095779419, "learning_rate": 1.6452590001857792e-05, "loss": 3.3681217193603517, "step": 22040 }, { "epoch": 0.17799052331635495, "grad_norm": 0.9670451283454895, "learning_rate": 1.645097453211958e-05, "loss": 3.034769058227539, "step": 22050 }, { "epoch": 0.178071244642122, "grad_norm": 0.8511298298835754, "learning_rate": 1.6449359062381364e-05, "loss": 3.501474380493164, "step": 22060 }, { "epoch": 0.17815196596788907, "grad_norm": 0.7810804843902588, "learning_rate": 1.6447743592643152e-05, "loss": 3.6337928771972656, "step": 22070 }, { "epoch": 0.1782326872936561, "grad_norm": 0.8042369484901428, "learning_rate": 1.644612812290494e-05, "loss": 2.839668083190918, "step": 22080 }, { "epoch": 0.17831340861942316, "grad_norm": 0.739065408706665, "learning_rate": 1.6444512653166727e-05, "loss": 3.1931509017944335, "step": 22090 }, { "epoch": 0.17839412994519022, "grad_norm": 1.523085355758667, "learning_rate": 1.644289718342851e-05, "loss": 3.438343811035156, "step": 22100 }, { "epoch": 0.17847485127095727, "grad_norm": 1.002699613571167, "learning_rate": 1.64412817136903e-05, "loss": 3.048117828369141, "step": 22110 }, { "epoch": 0.17855557259672433, "grad_norm": 0.6156594157218933, "learning_rate": 1.6439666243952087e-05, "loss": 3.534493637084961, "step": 22120 }, { "epoch": 0.1786362939224914, "grad_norm": 0.661725640296936, "learning_rate": 1.6438050774213875e-05, "loss": 3.676182174682617, "step": 22130 }, { "epoch": 0.17871701524825845, "grad_norm": 0.7463036179542542, "learning_rate": 1.643643530447566e-05, "loss": 3.237315368652344, "step": 22140 }, { "epoch": 0.17879773657402548, "grad_norm": 1.0246258974075317, "learning_rate": 1.6434819834737447e-05, "loss": 3.3925827026367186, "step": 22150 }, { "epoch": 0.17887845789979254, "grad_norm": 1.1777102947235107, "learning_rate": 1.6433204364999234e-05, "loss": 3.099839782714844, "step": 22160 }, { "epoch": 0.1789591792255596, "grad_norm": 0.6973580718040466, "learning_rate": 1.6431588895261022e-05, "loss": 3.6139678955078125, "step": 22170 }, { "epoch": 0.17903990055132665, "grad_norm": 1.112760305404663, "learning_rate": 1.6429973425522806e-05, "loss": 3.4504924774169923, "step": 22180 }, { "epoch": 0.1791206218770937, "grad_norm": 0.5328574180603027, "learning_rate": 1.6428357955784594e-05, "loss": 3.0111534118652346, "step": 22190 }, { "epoch": 0.17920134320286077, "grad_norm": 0.6765215396881104, "learning_rate": 1.6426742486046382e-05, "loss": 3.283885955810547, "step": 22200 }, { "epoch": 0.17928206452862783, "grad_norm": 1.085203766822815, "learning_rate": 1.642512701630817e-05, "loss": 2.802008628845215, "step": 22210 }, { "epoch": 0.17936278585439488, "grad_norm": 1.3513435125350952, "learning_rate": 1.6423511546569954e-05, "loss": 4.051167297363281, "step": 22220 }, { "epoch": 0.17944350718016192, "grad_norm": 0.7240586280822754, "learning_rate": 1.642189607683174e-05, "loss": 3.6179279327392577, "step": 22230 }, { "epoch": 0.17952422850592897, "grad_norm": 1.2410447597503662, "learning_rate": 1.642028060709353e-05, "loss": 3.1380966186523436, "step": 22240 }, { "epoch": 0.17960494983169603, "grad_norm": 0.5229572653770447, "learning_rate": 1.6418665137355317e-05, "loss": 3.1130178451538084, "step": 22250 }, { "epoch": 0.1796856711574631, "grad_norm": 0.5413787961006165, "learning_rate": 1.64170496676171e-05, "loss": 3.08884220123291, "step": 22260 }, { "epoch": 0.17976639248323015, "grad_norm": 1.057869553565979, "learning_rate": 1.641543419787889e-05, "loss": 3.3801429748535154, "step": 22270 }, { "epoch": 0.1798471138089972, "grad_norm": 0.8062539100646973, "learning_rate": 1.6413818728140677e-05, "loss": 3.159362030029297, "step": 22280 }, { "epoch": 0.17992783513476426, "grad_norm": 1.3301448822021484, "learning_rate": 1.6412203258402464e-05, "loss": 3.2365345001220702, "step": 22290 }, { "epoch": 0.18000855646053132, "grad_norm": 1.3056679964065552, "learning_rate": 1.641058778866425e-05, "loss": 3.2323444366455076, "step": 22300 }, { "epoch": 0.18008927778629835, "grad_norm": 1.138380765914917, "learning_rate": 1.6408972318926036e-05, "loss": 3.701898193359375, "step": 22310 }, { "epoch": 0.1801699991120654, "grad_norm": 0.9556983113288879, "learning_rate": 1.6407356849187824e-05, "loss": 3.3343128204345702, "step": 22320 }, { "epoch": 0.18025072043783247, "grad_norm": 0.9000096917152405, "learning_rate": 1.640574137944961e-05, "loss": 3.229711151123047, "step": 22330 }, { "epoch": 0.18033144176359953, "grad_norm": 0.9711746573448181, "learning_rate": 1.6404125909711396e-05, "loss": 3.5671127319335936, "step": 22340 }, { "epoch": 0.18041216308936658, "grad_norm": 0.5277588367462158, "learning_rate": 1.6402510439973184e-05, "loss": 3.3099666595458985, "step": 22350 }, { "epoch": 0.18049288441513364, "grad_norm": 0.9452173709869385, "learning_rate": 1.640089497023497e-05, "loss": 3.3496959686279295, "step": 22360 }, { "epoch": 0.1805736057409007, "grad_norm": 0.8285601735115051, "learning_rate": 1.639927950049676e-05, "loss": 3.986601638793945, "step": 22370 }, { "epoch": 0.18065432706666773, "grad_norm": 1.1123019456863403, "learning_rate": 1.6397664030758543e-05, "loss": 3.2961959838867188, "step": 22380 }, { "epoch": 0.1807350483924348, "grad_norm": 0.7558665871620178, "learning_rate": 1.639604856102033e-05, "loss": 3.637830352783203, "step": 22390 }, { "epoch": 0.18081576971820185, "grad_norm": 0.7559013962745667, "learning_rate": 1.639443309128212e-05, "loss": 3.148690414428711, "step": 22400 }, { "epoch": 0.1808964910439689, "grad_norm": 1.1438500881195068, "learning_rate": 1.6392817621543906e-05, "loss": 2.871378517150879, "step": 22410 }, { "epoch": 0.18097721236973596, "grad_norm": 1.0101439952850342, "learning_rate": 1.639120215180569e-05, "loss": 3.220087432861328, "step": 22420 }, { "epoch": 0.18105793369550302, "grad_norm": 0.9094130992889404, "learning_rate": 1.638958668206748e-05, "loss": 3.0149663925170898, "step": 22430 }, { "epoch": 0.18113865502127008, "grad_norm": 1.0521166324615479, "learning_rate": 1.6387971212329266e-05, "loss": 3.1602556228637697, "step": 22440 }, { "epoch": 0.18121937634703714, "grad_norm": 0.6138969659805298, "learning_rate": 1.6386355742591054e-05, "loss": 3.308550262451172, "step": 22450 }, { "epoch": 0.18130009767280417, "grad_norm": 0.6721711754798889, "learning_rate": 1.6384740272852838e-05, "loss": 3.0746315002441404, "step": 22460 }, { "epoch": 0.18138081899857122, "grad_norm": 0.661386251449585, "learning_rate": 1.6383124803114626e-05, "loss": 2.993696594238281, "step": 22470 }, { "epoch": 0.18146154032433828, "grad_norm": 1.4415018558502197, "learning_rate": 1.6381509333376413e-05, "loss": 3.5221046447753905, "step": 22480 }, { "epoch": 0.18154226165010534, "grad_norm": 0.7495632171630859, "learning_rate": 1.63798938636382e-05, "loss": 3.364022064208984, "step": 22490 }, { "epoch": 0.1816229829758724, "grad_norm": 2.063483953475952, "learning_rate": 1.6378278393899985e-05, "loss": 3.1761138916015623, "step": 22500 }, { "epoch": 0.18170370430163946, "grad_norm": 1.576633334159851, "learning_rate": 1.6376662924161773e-05, "loss": 3.9171306610107424, "step": 22510 }, { "epoch": 0.18178442562740652, "grad_norm": 0.9408772587776184, "learning_rate": 1.637504745442356e-05, "loss": 3.2776630401611326, "step": 22520 }, { "epoch": 0.18186514695317355, "grad_norm": 0.7368385195732117, "learning_rate": 1.637343198468535e-05, "loss": 3.8409500122070312, "step": 22530 }, { "epoch": 0.1819458682789406, "grad_norm": 0.9993413686752319, "learning_rate": 1.6371816514947133e-05, "loss": 3.8870052337646483, "step": 22540 }, { "epoch": 0.18202658960470766, "grad_norm": 1.5259069204330444, "learning_rate": 1.637020104520892e-05, "loss": 2.733789825439453, "step": 22550 }, { "epoch": 0.18210731093047472, "grad_norm": 0.8871632218360901, "learning_rate": 1.6368585575470708e-05, "loss": 2.7597173690795898, "step": 22560 }, { "epoch": 0.18218803225624178, "grad_norm": 0.8959642052650452, "learning_rate": 1.6366970105732496e-05, "loss": 3.2344680786132813, "step": 22570 }, { "epoch": 0.18226875358200884, "grad_norm": 0.9778908491134644, "learning_rate": 1.636535463599428e-05, "loss": 3.1553232192993166, "step": 22580 }, { "epoch": 0.1823494749077759, "grad_norm": 1.5426729917526245, "learning_rate": 1.636373916625607e-05, "loss": 3.1936418533325197, "step": 22590 }, { "epoch": 0.18243019623354295, "grad_norm": 1.1918787956237793, "learning_rate": 1.6362123696517856e-05, "loss": 2.6734447479248047, "step": 22600 }, { "epoch": 0.18251091755930998, "grad_norm": 1.1448441743850708, "learning_rate": 1.6360508226779643e-05, "loss": 2.738094520568848, "step": 22610 }, { "epoch": 0.18259163888507704, "grad_norm": 0.9875615835189819, "learning_rate": 1.6358892757041428e-05, "loss": 3.110544967651367, "step": 22620 }, { "epoch": 0.1826723602108441, "grad_norm": 0.7834101915359497, "learning_rate": 1.635727728730322e-05, "loss": 3.3021270751953127, "step": 22630 }, { "epoch": 0.18275308153661116, "grad_norm": 0.9837089776992798, "learning_rate": 1.6355661817565003e-05, "loss": 3.3796432495117186, "step": 22640 }, { "epoch": 0.18283380286237821, "grad_norm": 0.7709181904792786, "learning_rate": 1.635404634782679e-05, "loss": 3.306754302978516, "step": 22650 }, { "epoch": 0.18291452418814527, "grad_norm": 1.1870381832122803, "learning_rate": 1.6352430878088575e-05, "loss": 2.8838781356811523, "step": 22660 }, { "epoch": 0.18299524551391233, "grad_norm": 0.7058989405632019, "learning_rate": 1.6350815408350366e-05, "loss": 3.231284332275391, "step": 22670 }, { "epoch": 0.1830759668396794, "grad_norm": 1.3017247915267944, "learning_rate": 1.634919993861215e-05, "loss": 2.8000869750976562, "step": 22680 }, { "epoch": 0.18315668816544642, "grad_norm": 0.6715003252029419, "learning_rate": 1.6347584468873938e-05, "loss": 3.4300987243652346, "step": 22690 }, { "epoch": 0.18323740949121348, "grad_norm": 1.2221851348876953, "learning_rate": 1.6345968999135722e-05, "loss": 2.9692188262939454, "step": 22700 }, { "epoch": 0.18331813081698053, "grad_norm": 1.1306012868881226, "learning_rate": 1.6344353529397513e-05, "loss": 3.3727657318115236, "step": 22710 }, { "epoch": 0.1833988521427476, "grad_norm": 0.7851231098175049, "learning_rate": 1.6342738059659298e-05, "loss": 3.6877410888671873, "step": 22720 }, { "epoch": 0.18347957346851465, "grad_norm": 0.7826153635978699, "learning_rate": 1.6341122589921085e-05, "loss": 3.1721248626708984, "step": 22730 }, { "epoch": 0.1835602947942817, "grad_norm": 0.8902104496955872, "learning_rate": 1.633950712018287e-05, "loss": 3.786595916748047, "step": 22740 }, { "epoch": 0.18364101612004877, "grad_norm": 0.979967474937439, "learning_rate": 1.633789165044466e-05, "loss": 3.191755485534668, "step": 22750 }, { "epoch": 0.1837217374458158, "grad_norm": 0.7230592370033264, "learning_rate": 1.6336276180706445e-05, "loss": 3.386301803588867, "step": 22760 }, { "epoch": 0.18380245877158286, "grad_norm": 0.999381422996521, "learning_rate": 1.6334660710968233e-05, "loss": 2.8863994598388674, "step": 22770 }, { "epoch": 0.1838831800973499, "grad_norm": 0.7198321223258972, "learning_rate": 1.633304524123002e-05, "loss": 3.043731689453125, "step": 22780 }, { "epoch": 0.18396390142311697, "grad_norm": 0.7551234364509583, "learning_rate": 1.6331429771491808e-05, "loss": 3.4909652709960937, "step": 22790 }, { "epoch": 0.18404462274888403, "grad_norm": 1.4334248304367065, "learning_rate": 1.6329814301753596e-05, "loss": 3.287279510498047, "step": 22800 }, { "epoch": 0.1841253440746511, "grad_norm": 2.9865729808807373, "learning_rate": 1.632819883201538e-05, "loss": 3.182158660888672, "step": 22810 }, { "epoch": 0.18420606540041815, "grad_norm": 1.487412452697754, "learning_rate": 1.6326583362277168e-05, "loss": 3.2793563842773437, "step": 22820 }, { "epoch": 0.1842867867261852, "grad_norm": 1.0802431106567383, "learning_rate": 1.6324967892538956e-05, "loss": 3.4768585205078124, "step": 22830 }, { "epoch": 0.18436750805195223, "grad_norm": 1.0249156951904297, "learning_rate": 1.6323352422800743e-05, "loss": 3.241486740112305, "step": 22840 }, { "epoch": 0.1844482293777193, "grad_norm": 0.9691365957260132, "learning_rate": 1.6321736953062528e-05, "loss": 2.9336109161376953, "step": 22850 }, { "epoch": 0.18452895070348635, "grad_norm": 0.9236167669296265, "learning_rate": 1.6320121483324315e-05, "loss": 2.9511974334716795, "step": 22860 }, { "epoch": 0.1846096720292534, "grad_norm": 0.7641038298606873, "learning_rate": 1.6318506013586103e-05, "loss": 3.329667663574219, "step": 22870 }, { "epoch": 0.18469039335502047, "grad_norm": 1.0347670316696167, "learning_rate": 1.631689054384789e-05, "loss": 2.858465385437012, "step": 22880 }, { "epoch": 0.18477111468078752, "grad_norm": 1.6376934051513672, "learning_rate": 1.6315275074109675e-05, "loss": 2.953977584838867, "step": 22890 }, { "epoch": 0.18485183600655458, "grad_norm": 1.0427852869033813, "learning_rate": 1.6313659604371463e-05, "loss": 3.1269664764404297, "step": 22900 }, { "epoch": 0.18493255733232164, "grad_norm": 1.3630297183990479, "learning_rate": 1.631204413463325e-05, "loss": 3.3068973541259767, "step": 22910 }, { "epoch": 0.18501327865808867, "grad_norm": 0.7793409824371338, "learning_rate": 1.6310428664895038e-05, "loss": 3.028607940673828, "step": 22920 }, { "epoch": 0.18509399998385573, "grad_norm": 1.062882423400879, "learning_rate": 1.6308813195156822e-05, "loss": 3.012635612487793, "step": 22930 }, { "epoch": 0.1851747213096228, "grad_norm": 0.6542176008224487, "learning_rate": 1.630719772541861e-05, "loss": 3.881139373779297, "step": 22940 }, { "epoch": 0.18525544263538984, "grad_norm": 1.401008129119873, "learning_rate": 1.6305582255680398e-05, "loss": 3.6553558349609374, "step": 22950 }, { "epoch": 0.1853361639611569, "grad_norm": 1.1879181861877441, "learning_rate": 1.6303966785942185e-05, "loss": 3.1020252227783205, "step": 22960 }, { "epoch": 0.18541688528692396, "grad_norm": 1.0697535276412964, "learning_rate": 1.630235131620397e-05, "loss": 3.2910438537597657, "step": 22970 }, { "epoch": 0.18549760661269102, "grad_norm": 0.9822960495948792, "learning_rate": 1.6300735846465757e-05, "loss": 3.0218221664428713, "step": 22980 }, { "epoch": 0.18557832793845805, "grad_norm": 1.0868964195251465, "learning_rate": 1.6299120376727545e-05, "loss": 3.4678409576416014, "step": 22990 }, { "epoch": 0.1856590492642251, "grad_norm": 0.965099036693573, "learning_rate": 1.6297504906989333e-05, "loss": 3.281504440307617, "step": 23000 }, { "epoch": 0.18573977058999216, "grad_norm": 0.6640635132789612, "learning_rate": 1.6295889437251117e-05, "loss": 2.9586410522460938, "step": 23010 }, { "epoch": 0.18582049191575922, "grad_norm": 1.1377053260803223, "learning_rate": 1.6294273967512905e-05, "loss": 3.762738800048828, "step": 23020 }, { "epoch": 0.18590121324152628, "grad_norm": 1.018424391746521, "learning_rate": 1.6292658497774693e-05, "loss": 3.177113723754883, "step": 23030 }, { "epoch": 0.18598193456729334, "grad_norm": 0.8554106950759888, "learning_rate": 1.629104302803648e-05, "loss": 2.8380918502807617, "step": 23040 }, { "epoch": 0.1860626558930604, "grad_norm": 0.6359031796455383, "learning_rate": 1.6289427558298265e-05, "loss": 3.0716598510742186, "step": 23050 }, { "epoch": 0.18614337721882745, "grad_norm": 1.7280950546264648, "learning_rate": 1.6287812088560052e-05, "loss": 3.108026885986328, "step": 23060 }, { "epoch": 0.18622409854459449, "grad_norm": 0.9539675116539001, "learning_rate": 1.628619661882184e-05, "loss": 3.4889556884765627, "step": 23070 }, { "epoch": 0.18630481987036154, "grad_norm": 0.8052927851676941, "learning_rate": 1.6284581149083628e-05, "loss": 2.78271541595459, "step": 23080 }, { "epoch": 0.1863855411961286, "grad_norm": 1.2761740684509277, "learning_rate": 1.6282965679345412e-05, "loss": 2.957712745666504, "step": 23090 }, { "epoch": 0.18646626252189566, "grad_norm": 0.6809861063957214, "learning_rate": 1.62813502096072e-05, "loss": 3.050462341308594, "step": 23100 }, { "epoch": 0.18654698384766272, "grad_norm": 0.6944830417633057, "learning_rate": 1.6279734739868987e-05, "loss": 3.6789493560791016, "step": 23110 }, { "epoch": 0.18662770517342978, "grad_norm": 1.988194227218628, "learning_rate": 1.6278119270130775e-05, "loss": 3.3488204956054686, "step": 23120 }, { "epoch": 0.18670842649919683, "grad_norm": 1.3020676374435425, "learning_rate": 1.627650380039256e-05, "loss": 3.380087661743164, "step": 23130 }, { "epoch": 0.18678914782496386, "grad_norm": 0.9182770848274231, "learning_rate": 1.6274888330654347e-05, "loss": 3.489026641845703, "step": 23140 }, { "epoch": 0.18686986915073092, "grad_norm": 0.9295792579650879, "learning_rate": 1.6273272860916135e-05, "loss": 3.149000358581543, "step": 23150 }, { "epoch": 0.18695059047649798, "grad_norm": 1.0842448472976685, "learning_rate": 1.6271657391177922e-05, "loss": 3.5408321380615235, "step": 23160 }, { "epoch": 0.18703131180226504, "grad_norm": 1.1339493989944458, "learning_rate": 1.6270041921439707e-05, "loss": 3.185424041748047, "step": 23170 }, { "epoch": 0.1871120331280321, "grad_norm": 0.7753625512123108, "learning_rate": 1.6268426451701494e-05, "loss": 3.2642593383789062, "step": 23180 }, { "epoch": 0.18719275445379915, "grad_norm": 2.8987412452697754, "learning_rate": 1.6266810981963282e-05, "loss": 3.7103927612304686, "step": 23190 }, { "epoch": 0.1872734757795662, "grad_norm": 1.0363123416900635, "learning_rate": 1.626519551222507e-05, "loss": 3.12355899810791, "step": 23200 }, { "epoch": 0.18735419710533327, "grad_norm": 1.5194953680038452, "learning_rate": 1.6263580042486854e-05, "loss": 2.796257209777832, "step": 23210 }, { "epoch": 0.1874349184311003, "grad_norm": 0.9085275530815125, "learning_rate": 1.6261964572748642e-05, "loss": 3.357844924926758, "step": 23220 }, { "epoch": 0.18751563975686736, "grad_norm": 1.2938587665557861, "learning_rate": 1.626034910301043e-05, "loss": 3.3097557067871093, "step": 23230 }, { "epoch": 0.18759636108263442, "grad_norm": 1.1073318719863892, "learning_rate": 1.6258733633272217e-05, "loss": 2.8470577239990233, "step": 23240 }, { "epoch": 0.18767708240840147, "grad_norm": 1.172503113746643, "learning_rate": 1.6257118163534e-05, "loss": 3.538645935058594, "step": 23250 }, { "epoch": 0.18775780373416853, "grad_norm": 0.6866394281387329, "learning_rate": 1.625550269379579e-05, "loss": 3.3899436950683595, "step": 23260 }, { "epoch": 0.1878385250599356, "grad_norm": 0.8652214407920837, "learning_rate": 1.6253887224057577e-05, "loss": 3.2560630798339845, "step": 23270 }, { "epoch": 0.18791924638570265, "grad_norm": 0.7995895147323608, "learning_rate": 1.6252271754319365e-05, "loss": 3.2567523956298827, "step": 23280 }, { "epoch": 0.1879999677114697, "grad_norm": 0.8471545577049255, "learning_rate": 1.625065628458115e-05, "loss": 2.8751840591430664, "step": 23290 }, { "epoch": 0.18808068903723674, "grad_norm": 0.9804918169975281, "learning_rate": 1.6249040814842937e-05, "loss": 3.4038406372070313, "step": 23300 }, { "epoch": 0.1881614103630038, "grad_norm": 0.8654065132141113, "learning_rate": 1.6247425345104724e-05, "loss": 2.9842561721801757, "step": 23310 }, { "epoch": 0.18824213168877085, "grad_norm": 0.8100205063819885, "learning_rate": 1.6245809875366512e-05, "loss": 2.935525894165039, "step": 23320 }, { "epoch": 0.1883228530145379, "grad_norm": 0.9909689426422119, "learning_rate": 1.6244194405628296e-05, "loss": 3.0431222915649414, "step": 23330 }, { "epoch": 0.18840357434030497, "grad_norm": 1.1353240013122559, "learning_rate": 1.6242578935890084e-05, "loss": 3.0652372360229494, "step": 23340 }, { "epoch": 0.18848429566607203, "grad_norm": 0.7690233588218689, "learning_rate": 1.624096346615187e-05, "loss": 3.276839828491211, "step": 23350 }, { "epoch": 0.18856501699183909, "grad_norm": 0.8508498668670654, "learning_rate": 1.623934799641366e-05, "loss": 3.4843029022216796, "step": 23360 }, { "epoch": 0.18864573831760612, "grad_norm": 1.3157881498336792, "learning_rate": 1.6237732526675444e-05, "loss": 3.0534072875976563, "step": 23370 }, { "epoch": 0.18872645964337317, "grad_norm": 1.2340388298034668, "learning_rate": 1.623611705693723e-05, "loss": 2.8569435119628905, "step": 23380 }, { "epoch": 0.18880718096914023, "grad_norm": 1.551833152770996, "learning_rate": 1.623450158719902e-05, "loss": 3.524658203125, "step": 23390 }, { "epoch": 0.1888879022949073, "grad_norm": 0.8343998789787292, "learning_rate": 1.6232886117460807e-05, "loss": 3.2041000366210937, "step": 23400 }, { "epoch": 0.18896862362067435, "grad_norm": 0.633614182472229, "learning_rate": 1.623127064772259e-05, "loss": 2.720113754272461, "step": 23410 }, { "epoch": 0.1890493449464414, "grad_norm": 0.8508701324462891, "learning_rate": 1.622965517798438e-05, "loss": 2.8913267135620115, "step": 23420 }, { "epoch": 0.18913006627220846, "grad_norm": 1.1163283586502075, "learning_rate": 1.6228039708246166e-05, "loss": 3.2894664764404298, "step": 23430 }, { "epoch": 0.18921078759797552, "grad_norm": 0.8145689964294434, "learning_rate": 1.6226424238507954e-05, "loss": 3.2083293914794924, "step": 23440 }, { "epoch": 0.18929150892374255, "grad_norm": 1.0454039573669434, "learning_rate": 1.622480876876974e-05, "loss": 3.5545795440673826, "step": 23450 }, { "epoch": 0.1893722302495096, "grad_norm": 0.9795132279396057, "learning_rate": 1.622319329903153e-05, "loss": 3.1573554992675783, "step": 23460 }, { "epoch": 0.18945295157527667, "grad_norm": 1.3890411853790283, "learning_rate": 1.6221577829293314e-05, "loss": 2.7202054977416994, "step": 23470 }, { "epoch": 0.18953367290104373, "grad_norm": 0.6655047535896301, "learning_rate": 1.62199623595551e-05, "loss": 2.7982109069824217, "step": 23480 }, { "epoch": 0.18961439422681078, "grad_norm": 0.9373840093612671, "learning_rate": 1.6218346889816886e-05, "loss": 3.1865476608276366, "step": 23490 }, { "epoch": 0.18969511555257784, "grad_norm": 0.9271283149719238, "learning_rate": 1.6216731420078677e-05, "loss": 3.4473155975341796, "step": 23500 }, { "epoch": 0.1897758368783449, "grad_norm": 1.3385534286499023, "learning_rate": 1.621511595034046e-05, "loss": 3.118760681152344, "step": 23510 }, { "epoch": 0.18985655820411196, "grad_norm": 0.5846017599105835, "learning_rate": 1.621350048060225e-05, "loss": 3.2005210876464845, "step": 23520 }, { "epoch": 0.189937279529879, "grad_norm": 0.8705266714096069, "learning_rate": 1.6211885010864033e-05, "loss": 3.2974910736083984, "step": 23530 }, { "epoch": 0.19001800085564605, "grad_norm": 0.8925808668136597, "learning_rate": 1.6210269541125824e-05, "loss": 3.325872802734375, "step": 23540 }, { "epoch": 0.1900987221814131, "grad_norm": 1.3276281356811523, "learning_rate": 1.620865407138761e-05, "loss": 3.3612445831298827, "step": 23550 }, { "epoch": 0.19017944350718016, "grad_norm": 0.8914709091186523, "learning_rate": 1.6207038601649396e-05, "loss": 3.2289501190185548, "step": 23560 }, { "epoch": 0.19026016483294722, "grad_norm": 1.1164799928665161, "learning_rate": 1.620542313191118e-05, "loss": 3.3050445556640624, "step": 23570 }, { "epoch": 0.19034088615871428, "grad_norm": 0.7338820695877075, "learning_rate": 1.620380766217297e-05, "loss": 2.8112022399902346, "step": 23580 }, { "epoch": 0.19042160748448134, "grad_norm": 0.7167808413505554, "learning_rate": 1.6202192192434756e-05, "loss": 3.3968475341796873, "step": 23590 }, { "epoch": 0.19050232881024837, "grad_norm": 1.1286262273788452, "learning_rate": 1.6200576722696544e-05, "loss": 3.3115074157714846, "step": 23600 }, { "epoch": 0.19058305013601542, "grad_norm": 0.914253294467926, "learning_rate": 1.6198961252958328e-05, "loss": 2.835132598876953, "step": 23610 }, { "epoch": 0.19066377146178248, "grad_norm": 0.789267361164093, "learning_rate": 1.619734578322012e-05, "loss": 3.248060607910156, "step": 23620 }, { "epoch": 0.19074449278754954, "grad_norm": 0.9336708784103394, "learning_rate": 1.6195730313481903e-05, "loss": 3.1974010467529297, "step": 23630 }, { "epoch": 0.1908252141133166, "grad_norm": 0.734697163105011, "learning_rate": 1.619411484374369e-05, "loss": 3.644911193847656, "step": 23640 }, { "epoch": 0.19090593543908366, "grad_norm": 0.8194568157196045, "learning_rate": 1.619249937400548e-05, "loss": 3.506325531005859, "step": 23650 }, { "epoch": 0.19098665676485072, "grad_norm": 0.9390262961387634, "learning_rate": 1.6190883904267266e-05, "loss": 3.2001251220703124, "step": 23660 }, { "epoch": 0.19106737809061777, "grad_norm": 0.8892572522163391, "learning_rate": 1.618926843452905e-05, "loss": 3.045167350769043, "step": 23670 }, { "epoch": 0.1911480994163848, "grad_norm": 2.4577395915985107, "learning_rate": 1.618765296479084e-05, "loss": 3.450155258178711, "step": 23680 }, { "epoch": 0.19122882074215186, "grad_norm": 1.13399076461792, "learning_rate": 1.6186037495052626e-05, "loss": 3.3179161071777346, "step": 23690 }, { "epoch": 0.19130954206791892, "grad_norm": 1.0873539447784424, "learning_rate": 1.6184422025314414e-05, "loss": 3.388339614868164, "step": 23700 }, { "epoch": 0.19139026339368598, "grad_norm": 0.7622453570365906, "learning_rate": 1.6182806555576198e-05, "loss": 3.511822509765625, "step": 23710 }, { "epoch": 0.19147098471945304, "grad_norm": 0.6704912781715393, "learning_rate": 1.6181191085837986e-05, "loss": 3.5582157135009767, "step": 23720 }, { "epoch": 0.1915517060452201, "grad_norm": 1.1051424741744995, "learning_rate": 1.6179575616099773e-05, "loss": 3.212118148803711, "step": 23730 }, { "epoch": 0.19163242737098715, "grad_norm": 1.098064661026001, "learning_rate": 1.617796014636156e-05, "loss": 3.2373912811279295, "step": 23740 }, { "epoch": 0.19171314869675418, "grad_norm": 0.7736158967018127, "learning_rate": 1.6176344676623345e-05, "loss": 3.0454673767089844, "step": 23750 }, { "epoch": 0.19179387002252124, "grad_norm": 0.9134088158607483, "learning_rate": 1.6174729206885133e-05, "loss": 3.136491394042969, "step": 23760 }, { "epoch": 0.1918745913482883, "grad_norm": 1.7103272676467896, "learning_rate": 1.617311373714692e-05, "loss": 3.478629302978516, "step": 23770 }, { "epoch": 0.19195531267405536, "grad_norm": 4.062778949737549, "learning_rate": 1.617149826740871e-05, "loss": 4.146917343139648, "step": 23780 }, { "epoch": 0.19203603399982241, "grad_norm": 1.150097131729126, "learning_rate": 1.6169882797670493e-05, "loss": 3.09771728515625, "step": 23790 }, { "epoch": 0.19211675532558947, "grad_norm": 0.951539158821106, "learning_rate": 1.616826732793228e-05, "loss": 3.37896614074707, "step": 23800 }, { "epoch": 0.19219747665135653, "grad_norm": 0.8649698495864868, "learning_rate": 1.6166651858194068e-05, "loss": 2.9953916549682615, "step": 23810 }, { "epoch": 0.1922781979771236, "grad_norm": 0.9279341101646423, "learning_rate": 1.6165036388455856e-05, "loss": 3.4880840301513674, "step": 23820 }, { "epoch": 0.19235891930289062, "grad_norm": 0.7024520039558411, "learning_rate": 1.616342091871764e-05, "loss": 3.369676971435547, "step": 23830 }, { "epoch": 0.19243964062865768, "grad_norm": 1.0257861614227295, "learning_rate": 1.6161805448979428e-05, "loss": 3.907792663574219, "step": 23840 }, { "epoch": 0.19252036195442473, "grad_norm": 0.9019827246665955, "learning_rate": 1.6160189979241216e-05, "loss": 2.898299789428711, "step": 23850 }, { "epoch": 0.1926010832801918, "grad_norm": 0.7902045845985413, "learning_rate": 1.6158574509503003e-05, "loss": 3.81273193359375, "step": 23860 }, { "epoch": 0.19268180460595885, "grad_norm": 1.0405877828598022, "learning_rate": 1.6156959039764788e-05, "loss": 3.2782196044921874, "step": 23870 }, { "epoch": 0.1927625259317259, "grad_norm": 1.2540582418441772, "learning_rate": 1.6155343570026575e-05, "loss": 3.209376907348633, "step": 23880 }, { "epoch": 0.19284324725749297, "grad_norm": 0.8752418160438538, "learning_rate": 1.6153728100288363e-05, "loss": 3.316181945800781, "step": 23890 }, { "epoch": 0.19292396858326002, "grad_norm": 0.9095362424850464, "learning_rate": 1.615211263055015e-05, "loss": 3.5749252319335936, "step": 23900 }, { "epoch": 0.19300468990902706, "grad_norm": 0.8341363072395325, "learning_rate": 1.6150497160811935e-05, "loss": 3.202022171020508, "step": 23910 }, { "epoch": 0.1930854112347941, "grad_norm": 1.3472380638122559, "learning_rate": 1.6148881691073723e-05, "loss": 3.0854820251464843, "step": 23920 }, { "epoch": 0.19316613256056117, "grad_norm": 0.8283988833427429, "learning_rate": 1.614726622133551e-05, "loss": 3.1241470336914063, "step": 23930 }, { "epoch": 0.19324685388632823, "grad_norm": 1.465089201927185, "learning_rate": 1.6145650751597298e-05, "loss": 3.244511032104492, "step": 23940 }, { "epoch": 0.1933275752120953, "grad_norm": 1.1231766939163208, "learning_rate": 1.6144035281859082e-05, "loss": 3.1057533264160155, "step": 23950 }, { "epoch": 0.19340829653786235, "grad_norm": 0.8658732771873474, "learning_rate": 1.614241981212087e-05, "loss": 3.755840301513672, "step": 23960 }, { "epoch": 0.1934890178636294, "grad_norm": 1.0576173067092896, "learning_rate": 1.6140804342382658e-05, "loss": 3.317580795288086, "step": 23970 }, { "epoch": 0.19356973918939643, "grad_norm": 1.8567334413528442, "learning_rate": 1.6139188872644445e-05, "loss": 2.946705627441406, "step": 23980 }, { "epoch": 0.1936504605151635, "grad_norm": 0.699470579624176, "learning_rate": 1.613757340290623e-05, "loss": 3.1111764907836914, "step": 23990 }, { "epoch": 0.19373118184093055, "grad_norm": 0.7219898104667664, "learning_rate": 1.6135957933168017e-05, "loss": 3.4898277282714845, "step": 24000 }, { "epoch": 0.1938119031666976, "grad_norm": 1.169242024421692, "learning_rate": 1.6134342463429805e-05, "loss": 2.9915857315063477, "step": 24010 }, { "epoch": 0.19389262449246467, "grad_norm": 1.3105891942977905, "learning_rate": 1.6132726993691593e-05, "loss": 3.4457622528076173, "step": 24020 }, { "epoch": 0.19397334581823172, "grad_norm": 0.8844012022018433, "learning_rate": 1.6131111523953377e-05, "loss": 3.539424514770508, "step": 24030 }, { "epoch": 0.19405406714399878, "grad_norm": 1.4427428245544434, "learning_rate": 1.6129496054215165e-05, "loss": 3.260712432861328, "step": 24040 }, { "epoch": 0.19413478846976584, "grad_norm": 1.0142735242843628, "learning_rate": 1.6127880584476952e-05, "loss": 3.176499366760254, "step": 24050 }, { "epoch": 0.19421550979553287, "grad_norm": 0.7428361773490906, "learning_rate": 1.612626511473874e-05, "loss": 3.078945350646973, "step": 24060 }, { "epoch": 0.19429623112129993, "grad_norm": 0.8817248344421387, "learning_rate": 1.6124649645000528e-05, "loss": 3.0530858993530274, "step": 24070 }, { "epoch": 0.194376952447067, "grad_norm": 0.6206954717636108, "learning_rate": 1.6123034175262312e-05, "loss": 3.064038848876953, "step": 24080 }, { "epoch": 0.19445767377283404, "grad_norm": 1.610620379447937, "learning_rate": 1.61214187055241e-05, "loss": 3.6341758728027345, "step": 24090 }, { "epoch": 0.1945383950986011, "grad_norm": 0.5680055618286133, "learning_rate": 1.6119803235785888e-05, "loss": 3.2019813537597654, "step": 24100 }, { "epoch": 0.19461911642436816, "grad_norm": 0.6199904084205627, "learning_rate": 1.6118187766047675e-05, "loss": 2.763379669189453, "step": 24110 }, { "epoch": 0.19469983775013522, "grad_norm": 0.8744970560073853, "learning_rate": 1.611657229630946e-05, "loss": 2.83019962310791, "step": 24120 }, { "epoch": 0.19478055907590228, "grad_norm": 0.8182685971260071, "learning_rate": 1.6114956826571247e-05, "loss": 2.835678482055664, "step": 24130 }, { "epoch": 0.1948612804016693, "grad_norm": 1.4899362325668335, "learning_rate": 1.6113341356833035e-05, "loss": 3.332936096191406, "step": 24140 }, { "epoch": 0.19494200172743636, "grad_norm": 1.8905571699142456, "learning_rate": 1.6111725887094823e-05, "loss": 3.3743972778320312, "step": 24150 }, { "epoch": 0.19502272305320342, "grad_norm": 0.8741278648376465, "learning_rate": 1.6110110417356607e-05, "loss": 3.7543853759765624, "step": 24160 }, { "epoch": 0.19510344437897048, "grad_norm": 0.9249249696731567, "learning_rate": 1.6108494947618395e-05, "loss": 3.1388294219970705, "step": 24170 }, { "epoch": 0.19518416570473754, "grad_norm": 0.7830678224563599, "learning_rate": 1.6106879477880182e-05, "loss": 3.6693477630615234, "step": 24180 }, { "epoch": 0.1952648870305046, "grad_norm": 1.0440847873687744, "learning_rate": 1.610526400814197e-05, "loss": 3.233777618408203, "step": 24190 }, { "epoch": 0.19534560835627165, "grad_norm": 1.0480222702026367, "learning_rate": 1.6103648538403754e-05, "loss": 3.4019298553466797, "step": 24200 }, { "epoch": 0.19542632968203869, "grad_norm": 1.26025390625, "learning_rate": 1.6102033068665542e-05, "loss": 3.091984748840332, "step": 24210 }, { "epoch": 0.19550705100780574, "grad_norm": 0.9307644367218018, "learning_rate": 1.610041759892733e-05, "loss": 3.661907196044922, "step": 24220 }, { "epoch": 0.1955877723335728, "grad_norm": 0.8874568939208984, "learning_rate": 1.6098802129189117e-05, "loss": 3.0460485458374023, "step": 24230 }, { "epoch": 0.19566849365933986, "grad_norm": 0.947141170501709, "learning_rate": 1.60971866594509e-05, "loss": 3.2033382415771485, "step": 24240 }, { "epoch": 0.19574921498510692, "grad_norm": 0.9823180437088013, "learning_rate": 1.609557118971269e-05, "loss": 3.137985610961914, "step": 24250 }, { "epoch": 0.19582993631087398, "grad_norm": 1.0031545162200928, "learning_rate": 1.6093955719974477e-05, "loss": 3.310595703125, "step": 24260 }, { "epoch": 0.19591065763664103, "grad_norm": 1.266871452331543, "learning_rate": 1.6092340250236265e-05, "loss": 3.1892309188842773, "step": 24270 }, { "epoch": 0.1959913789624081, "grad_norm": 0.6336318850517273, "learning_rate": 1.609072478049805e-05, "loss": 2.9501665115356444, "step": 24280 }, { "epoch": 0.19607210028817512, "grad_norm": 1.1994023323059082, "learning_rate": 1.6089109310759837e-05, "loss": 3.023185157775879, "step": 24290 }, { "epoch": 0.19615282161394218, "grad_norm": 0.8686439990997314, "learning_rate": 1.6087493841021624e-05, "loss": 3.4324462890625, "step": 24300 }, { "epoch": 0.19623354293970924, "grad_norm": 0.8249073624610901, "learning_rate": 1.6085878371283412e-05, "loss": 3.143278121948242, "step": 24310 }, { "epoch": 0.1963142642654763, "grad_norm": 1.0410058498382568, "learning_rate": 1.6084262901545196e-05, "loss": 3.1772409439086915, "step": 24320 }, { "epoch": 0.19639498559124335, "grad_norm": 0.84428471326828, "learning_rate": 1.6082647431806988e-05, "loss": 3.526978302001953, "step": 24330 }, { "epoch": 0.1964757069170104, "grad_norm": 0.9356173872947693, "learning_rate": 1.6081031962068772e-05, "loss": 3.2589271545410154, "step": 24340 }, { "epoch": 0.19655642824277747, "grad_norm": 0.8176267147064209, "learning_rate": 1.607941649233056e-05, "loss": 2.993446922302246, "step": 24350 }, { "epoch": 0.1966371495685445, "grad_norm": 1.169244647026062, "learning_rate": 1.6077801022592344e-05, "loss": 2.807443618774414, "step": 24360 }, { "epoch": 0.19671787089431156, "grad_norm": 0.8716306090354919, "learning_rate": 1.6076185552854135e-05, "loss": 3.3796905517578124, "step": 24370 }, { "epoch": 0.19679859222007862, "grad_norm": 1.0365612506866455, "learning_rate": 1.607457008311592e-05, "loss": 3.0476926803588866, "step": 24380 }, { "epoch": 0.19687931354584567, "grad_norm": 1.1329491138458252, "learning_rate": 1.6072954613377707e-05, "loss": 3.557606506347656, "step": 24390 }, { "epoch": 0.19696003487161273, "grad_norm": 1.0917507410049438, "learning_rate": 1.607133914363949e-05, "loss": 3.2481834411621096, "step": 24400 }, { "epoch": 0.1970407561973798, "grad_norm": 1.5881402492523193, "learning_rate": 1.6069723673901282e-05, "loss": 3.07114200592041, "step": 24410 }, { "epoch": 0.19712147752314685, "grad_norm": 0.5880495309829712, "learning_rate": 1.6068108204163067e-05, "loss": 2.7820322036743166, "step": 24420 }, { "epoch": 0.1972021988489139, "grad_norm": 0.6369416117668152, "learning_rate": 1.6066492734424854e-05, "loss": 3.076784133911133, "step": 24430 }, { "epoch": 0.19728292017468094, "grad_norm": 0.8109725117683411, "learning_rate": 1.606487726468664e-05, "loss": 2.7619277954101564, "step": 24440 }, { "epoch": 0.197363641500448, "grad_norm": 1.04163658618927, "learning_rate": 1.606326179494843e-05, "loss": 3.3215457916259767, "step": 24450 }, { "epoch": 0.19744436282621505, "grad_norm": 0.9278497099876404, "learning_rate": 1.6061646325210214e-05, "loss": 3.1819482803344727, "step": 24460 }, { "epoch": 0.1975250841519821, "grad_norm": 1.1107007265090942, "learning_rate": 1.6060030855472e-05, "loss": 3.3118366241455077, "step": 24470 }, { "epoch": 0.19760580547774917, "grad_norm": 1.3523879051208496, "learning_rate": 1.6058415385733786e-05, "loss": 3.7181365966796873, "step": 24480 }, { "epoch": 0.19768652680351623, "grad_norm": 1.1287848949432373, "learning_rate": 1.6056799915995577e-05, "loss": 3.221536636352539, "step": 24490 }, { "epoch": 0.19776724812928329, "grad_norm": 1.0838234424591064, "learning_rate": 1.605518444625736e-05, "loss": 3.5938880920410154, "step": 24500 }, { "epoch": 0.19784796945505034, "grad_norm": 0.7721467614173889, "learning_rate": 1.605356897651915e-05, "loss": 3.2756542205810546, "step": 24510 }, { "epoch": 0.19792869078081737, "grad_norm": 1.315023422241211, "learning_rate": 1.6051953506780937e-05, "loss": 3.558721160888672, "step": 24520 }, { "epoch": 0.19800941210658443, "grad_norm": 1.5676518678665161, "learning_rate": 1.6050338037042724e-05, "loss": 3.1033452987670898, "step": 24530 }, { "epoch": 0.1980901334323515, "grad_norm": 0.5535191297531128, "learning_rate": 1.604872256730451e-05, "loss": 3.1410039901733398, "step": 24540 }, { "epoch": 0.19817085475811855, "grad_norm": 1.8656305074691772, "learning_rate": 1.6047107097566296e-05, "loss": 3.1839332580566406, "step": 24550 }, { "epoch": 0.1982515760838856, "grad_norm": 1.5933727025985718, "learning_rate": 1.6045491627828084e-05, "loss": 2.93743782043457, "step": 24560 }, { "epoch": 0.19833229740965266, "grad_norm": 0.9133821725845337, "learning_rate": 1.6043876158089872e-05, "loss": 3.2873085021972654, "step": 24570 }, { "epoch": 0.19841301873541972, "grad_norm": 1.011873483657837, "learning_rate": 1.6042260688351656e-05, "loss": 3.0474365234375, "step": 24580 }, { "epoch": 0.19849374006118675, "grad_norm": 1.2653502225875854, "learning_rate": 1.6040645218613444e-05, "loss": 3.059996223449707, "step": 24590 }, { "epoch": 0.1985744613869538, "grad_norm": 0.8000967502593994, "learning_rate": 1.603902974887523e-05, "loss": 3.438973236083984, "step": 24600 }, { "epoch": 0.19865518271272087, "grad_norm": 1.297107458114624, "learning_rate": 1.603741427913702e-05, "loss": 2.8299211502075194, "step": 24610 }, { "epoch": 0.19873590403848793, "grad_norm": 1.0078517198562622, "learning_rate": 1.6035798809398804e-05, "loss": 2.8579469680786134, "step": 24620 }, { "epoch": 0.19881662536425498, "grad_norm": 0.9385775923728943, "learning_rate": 1.603418333966059e-05, "loss": 2.9813180923461915, "step": 24630 }, { "epoch": 0.19889734669002204, "grad_norm": 0.7712946534156799, "learning_rate": 1.603256786992238e-05, "loss": 3.246923828125, "step": 24640 }, { "epoch": 0.1989780680157891, "grad_norm": 1.015197992324829, "learning_rate": 1.6030952400184167e-05, "loss": 3.3987842559814454, "step": 24650 }, { "epoch": 0.19905878934155616, "grad_norm": 1.2074414491653442, "learning_rate": 1.602933693044595e-05, "loss": 3.731204605102539, "step": 24660 }, { "epoch": 0.1991395106673232, "grad_norm": 2.143280506134033, "learning_rate": 1.602772146070774e-05, "loss": 3.0146636962890625, "step": 24670 }, { "epoch": 0.19922023199309025, "grad_norm": 1.0743279457092285, "learning_rate": 1.6026105990969526e-05, "loss": 3.3200801849365233, "step": 24680 }, { "epoch": 0.1993009533188573, "grad_norm": 0.7800185084342957, "learning_rate": 1.6024490521231314e-05, "loss": 3.101688194274902, "step": 24690 }, { "epoch": 0.19938167464462436, "grad_norm": 0.648487389087677, "learning_rate": 1.6022875051493098e-05, "loss": 3.2098560333251953, "step": 24700 }, { "epoch": 0.19946239597039142, "grad_norm": 0.9826608300209045, "learning_rate": 1.6021259581754886e-05, "loss": 2.5999725341796873, "step": 24710 }, { "epoch": 0.19954311729615848, "grad_norm": 1.3039942979812622, "learning_rate": 1.6019644112016674e-05, "loss": 3.9026241302490234, "step": 24720 }, { "epoch": 0.19962383862192554, "grad_norm": 1.0802559852600098, "learning_rate": 1.601802864227846e-05, "loss": 3.475798797607422, "step": 24730 }, { "epoch": 0.1997045599476926, "grad_norm": 1.2752443552017212, "learning_rate": 1.6016413172540246e-05, "loss": 3.120893096923828, "step": 24740 }, { "epoch": 0.19978528127345963, "grad_norm": 1.2756257057189941, "learning_rate": 1.6014797702802033e-05, "loss": 3.299129104614258, "step": 24750 }, { "epoch": 0.19986600259922668, "grad_norm": 1.121289610862732, "learning_rate": 1.601318223306382e-05, "loss": 3.015001106262207, "step": 24760 }, { "epoch": 0.19994672392499374, "grad_norm": 0.7539690732955933, "learning_rate": 1.601156676332561e-05, "loss": 2.8930540084838867, "step": 24770 }, { "epoch": 0.2000274452507608, "grad_norm": 0.6925106048583984, "learning_rate": 1.6009951293587393e-05, "loss": 3.290948486328125, "step": 24780 }, { "epoch": 0.20010816657652786, "grad_norm": 1.0113790035247803, "learning_rate": 1.600833582384918e-05, "loss": 3.6819114685058594, "step": 24790 }, { "epoch": 0.20018888790229492, "grad_norm": 0.965345025062561, "learning_rate": 1.600672035411097e-05, "loss": 3.2147640228271483, "step": 24800 }, { "epoch": 0.20026960922806197, "grad_norm": 0.7646992802619934, "learning_rate": 1.6005104884372756e-05, "loss": 3.194970893859863, "step": 24810 }, { "epoch": 0.200350330553829, "grad_norm": 1.0814145803451538, "learning_rate": 1.600348941463454e-05, "loss": 3.3229747772216798, "step": 24820 }, { "epoch": 0.20043105187959606, "grad_norm": 0.7490031123161316, "learning_rate": 1.6001873944896328e-05, "loss": 3.548350524902344, "step": 24830 }, { "epoch": 0.20051177320536312, "grad_norm": 2.7487165927886963, "learning_rate": 1.6000258475158116e-05, "loss": 3.2199428558349608, "step": 24840 }, { "epoch": 0.20059249453113018, "grad_norm": 0.5729919075965881, "learning_rate": 1.5998643005419904e-05, "loss": 2.946512222290039, "step": 24850 }, { "epoch": 0.20067321585689724, "grad_norm": 0.8843357563018799, "learning_rate": 1.5997027535681688e-05, "loss": 2.9854780197143556, "step": 24860 }, { "epoch": 0.2007539371826643, "grad_norm": 0.8410912752151489, "learning_rate": 1.5995412065943476e-05, "loss": 3.1144973754882814, "step": 24870 }, { "epoch": 0.20083465850843135, "grad_norm": 0.6037425994873047, "learning_rate": 1.5993796596205263e-05, "loss": 3.2004066467285157, "step": 24880 }, { "epoch": 0.2009153798341984, "grad_norm": 0.7565062642097473, "learning_rate": 1.599218112646705e-05, "loss": 3.18981819152832, "step": 24890 }, { "epoch": 0.20099610115996544, "grad_norm": 0.7127270102500916, "learning_rate": 1.5990565656728835e-05, "loss": 3.4144920349121093, "step": 24900 }, { "epoch": 0.2010768224857325, "grad_norm": 0.8104019165039062, "learning_rate": 1.5988950186990623e-05, "loss": 3.1720224380493165, "step": 24910 }, { "epoch": 0.20115754381149956, "grad_norm": 0.6914398074150085, "learning_rate": 1.598733471725241e-05, "loss": 2.990903854370117, "step": 24920 }, { "epoch": 0.20123826513726661, "grad_norm": 2.838608503341675, "learning_rate": 1.5985719247514198e-05, "loss": 3.684219741821289, "step": 24930 }, { "epoch": 0.20131898646303367, "grad_norm": 0.8658871650695801, "learning_rate": 1.5984103777775983e-05, "loss": 2.7807500839233397, "step": 24940 }, { "epoch": 0.20139970778880073, "grad_norm": 1.4521182775497437, "learning_rate": 1.598248830803777e-05, "loss": 3.096397590637207, "step": 24950 }, { "epoch": 0.2014804291145678, "grad_norm": 1.4909168481826782, "learning_rate": 1.5980872838299558e-05, "loss": 3.522649383544922, "step": 24960 }, { "epoch": 0.20156115044033482, "grad_norm": 1.0035933256149292, "learning_rate": 1.5979257368561346e-05, "loss": 3.151556205749512, "step": 24970 }, { "epoch": 0.20164187176610188, "grad_norm": 0.7385639548301697, "learning_rate": 1.597764189882313e-05, "loss": 3.1133514404296876, "step": 24980 }, { "epoch": 0.20172259309186893, "grad_norm": 1.1541099548339844, "learning_rate": 1.5976026429084918e-05, "loss": 3.3214527130126954, "step": 24990 }, { "epoch": 0.201803314417636, "grad_norm": 1.3742246627807617, "learning_rate": 1.5974410959346705e-05, "loss": 3.498880386352539, "step": 25000 }, { "epoch": 0.20188403574340305, "grad_norm": 0.8103464841842651, "learning_rate": 1.5972795489608493e-05, "loss": 3.3594429016113283, "step": 25010 }, { "epoch": 0.2019647570691701, "grad_norm": 3.646207571029663, "learning_rate": 1.5971180019870277e-05, "loss": 3.3981853485107423, "step": 25020 }, { "epoch": 0.20204547839493717, "grad_norm": 0.9887511730194092, "learning_rate": 1.5969564550132065e-05, "loss": 3.314623260498047, "step": 25030 }, { "epoch": 0.20212619972070422, "grad_norm": 0.8028542995452881, "learning_rate": 1.5967949080393853e-05, "loss": 3.1919750213623046, "step": 25040 }, { "epoch": 0.20220692104647126, "grad_norm": 1.1635522842407227, "learning_rate": 1.596633361065564e-05, "loss": 3.2287879943847657, "step": 25050 }, { "epoch": 0.2022876423722383, "grad_norm": 0.9818128943443298, "learning_rate": 1.5964718140917425e-05, "loss": 3.1524890899658202, "step": 25060 }, { "epoch": 0.20236836369800537, "grad_norm": 0.7281699776649475, "learning_rate": 1.5963102671179212e-05, "loss": 3.0977954864501953, "step": 25070 }, { "epoch": 0.20244908502377243, "grad_norm": 1.2577022314071655, "learning_rate": 1.5961487201441e-05, "loss": 3.4092262268066404, "step": 25080 }, { "epoch": 0.2025298063495395, "grad_norm": 1.0577138662338257, "learning_rate": 1.5959871731702788e-05, "loss": 2.9587051391601564, "step": 25090 }, { "epoch": 0.20261052767530655, "grad_norm": 1.421794056892395, "learning_rate": 1.5958256261964572e-05, "loss": 2.7787330627441404, "step": 25100 }, { "epoch": 0.2026912490010736, "grad_norm": 0.961014449596405, "learning_rate": 1.595664079222636e-05, "loss": 3.206330108642578, "step": 25110 }, { "epoch": 0.20277197032684066, "grad_norm": 1.0494061708450317, "learning_rate": 1.5955025322488147e-05, "loss": 3.0381513595581056, "step": 25120 }, { "epoch": 0.2028526916526077, "grad_norm": 0.9616536498069763, "learning_rate": 1.5953409852749935e-05, "loss": 2.7232580184936523, "step": 25130 }, { "epoch": 0.20293341297837475, "grad_norm": 0.9054750800132751, "learning_rate": 1.595179438301172e-05, "loss": 3.077963447570801, "step": 25140 }, { "epoch": 0.2030141343041418, "grad_norm": 1.1101468801498413, "learning_rate": 1.5950178913273507e-05, "loss": 2.704095458984375, "step": 25150 }, { "epoch": 0.20309485562990887, "grad_norm": 0.8670735359191895, "learning_rate": 1.5948563443535295e-05, "loss": 3.312397766113281, "step": 25160 }, { "epoch": 0.20317557695567592, "grad_norm": 1.116310715675354, "learning_rate": 1.5946947973797083e-05, "loss": 3.0565786361694336, "step": 25170 }, { "epoch": 0.20325629828144298, "grad_norm": 0.7981580495834351, "learning_rate": 1.5945332504058867e-05, "loss": 3.0323822021484377, "step": 25180 }, { "epoch": 0.20333701960721004, "grad_norm": 0.8985583782196045, "learning_rate": 1.5943717034320655e-05, "loss": 3.464037322998047, "step": 25190 }, { "epoch": 0.20341774093297707, "grad_norm": 0.9016704559326172, "learning_rate": 1.5942101564582442e-05, "loss": 3.3390167236328123, "step": 25200 }, { "epoch": 0.20349846225874413, "grad_norm": 0.7956854701042175, "learning_rate": 1.594048609484423e-05, "loss": 3.1176103591918944, "step": 25210 }, { "epoch": 0.2035791835845112, "grad_norm": 0.7748594284057617, "learning_rate": 1.5938870625106014e-05, "loss": 2.6201765060424806, "step": 25220 }, { "epoch": 0.20365990491027824, "grad_norm": 1.1096409559249878, "learning_rate": 1.5937255155367802e-05, "loss": 3.4124835968017577, "step": 25230 }, { "epoch": 0.2037406262360453, "grad_norm": 1.384230613708496, "learning_rate": 1.593563968562959e-05, "loss": 3.032024383544922, "step": 25240 }, { "epoch": 0.20382134756181236, "grad_norm": 1.2393274307250977, "learning_rate": 1.5934024215891377e-05, "loss": 3.2966331481933593, "step": 25250 }, { "epoch": 0.20390206888757942, "grad_norm": 0.9630767107009888, "learning_rate": 1.593240874615316e-05, "loss": 3.017056465148926, "step": 25260 }, { "epoch": 0.20398279021334648, "grad_norm": 0.6865170001983643, "learning_rate": 1.593079327641495e-05, "loss": 2.9724136352539063, "step": 25270 }, { "epoch": 0.2040635115391135, "grad_norm": 1.0644761323928833, "learning_rate": 1.5929177806676737e-05, "loss": 3.6435623168945312, "step": 25280 }, { "epoch": 0.20414423286488056, "grad_norm": 0.8343404531478882, "learning_rate": 1.5927562336938525e-05, "loss": 3.1340263366699217, "step": 25290 }, { "epoch": 0.20422495419064762, "grad_norm": 1.427808165550232, "learning_rate": 1.5925946867200312e-05, "loss": 3.0384286880493163, "step": 25300 }, { "epoch": 0.20430567551641468, "grad_norm": 0.8153987526893616, "learning_rate": 1.5924331397462097e-05, "loss": 2.934117889404297, "step": 25310 }, { "epoch": 0.20438639684218174, "grad_norm": 0.9132798314094543, "learning_rate": 1.5922715927723888e-05, "loss": 2.947539710998535, "step": 25320 }, { "epoch": 0.2044671181679488, "grad_norm": 0.7475882172584534, "learning_rate": 1.5921100457985672e-05, "loss": 3.287738800048828, "step": 25330 }, { "epoch": 0.20454783949371586, "grad_norm": 0.957861602306366, "learning_rate": 1.591948498824746e-05, "loss": 2.9169637680053713, "step": 25340 }, { "epoch": 0.20462856081948289, "grad_norm": 0.8029472827911377, "learning_rate": 1.5917869518509244e-05, "loss": 3.1561504364013673, "step": 25350 }, { "epoch": 0.20470928214524994, "grad_norm": 0.9404305815696716, "learning_rate": 1.5916254048771035e-05, "loss": 3.624939727783203, "step": 25360 }, { "epoch": 0.204790003471017, "grad_norm": 0.7375535368919373, "learning_rate": 1.591463857903282e-05, "loss": 2.943276596069336, "step": 25370 }, { "epoch": 0.20487072479678406, "grad_norm": 0.9646531343460083, "learning_rate": 1.5913023109294607e-05, "loss": 3.4195178985595702, "step": 25380 }, { "epoch": 0.20495144612255112, "grad_norm": 0.8914561867713928, "learning_rate": 1.5911407639556395e-05, "loss": 3.1869630813598633, "step": 25390 }, { "epoch": 0.20503216744831818, "grad_norm": 1.6839488744735718, "learning_rate": 1.5909792169818183e-05, "loss": 3.171493339538574, "step": 25400 }, { "epoch": 0.20511288877408523, "grad_norm": 1.0177078247070312, "learning_rate": 1.5908176700079967e-05, "loss": 2.8634241104125975, "step": 25410 }, { "epoch": 0.2051936100998523, "grad_norm": 1.0813193321228027, "learning_rate": 1.5906561230341755e-05, "loss": 2.8770883560180662, "step": 25420 }, { "epoch": 0.20527433142561932, "grad_norm": 0.7974318861961365, "learning_rate": 1.5904945760603542e-05, "loss": 2.9612247467041017, "step": 25430 }, { "epoch": 0.20535505275138638, "grad_norm": 1.3522495031356812, "learning_rate": 1.590333029086533e-05, "loss": 3.477069091796875, "step": 25440 }, { "epoch": 0.20543577407715344, "grad_norm": 0.9264211058616638, "learning_rate": 1.5901714821127114e-05, "loss": 3.658842849731445, "step": 25450 }, { "epoch": 0.2055164954029205, "grad_norm": 1.0020591020584106, "learning_rate": 1.5900099351388902e-05, "loss": 3.4423603057861327, "step": 25460 }, { "epoch": 0.20559721672868755, "grad_norm": 1.3811471462249756, "learning_rate": 1.589848388165069e-05, "loss": 3.280660629272461, "step": 25470 }, { "epoch": 0.2056779380544546, "grad_norm": 0.999925434589386, "learning_rate": 1.5896868411912477e-05, "loss": 3.753844451904297, "step": 25480 }, { "epoch": 0.20575865938022167, "grad_norm": 1.0075902938842773, "learning_rate": 1.589525294217426e-05, "loss": 3.020255470275879, "step": 25490 }, { "epoch": 0.20583938070598873, "grad_norm": 0.7397570610046387, "learning_rate": 1.589363747243605e-05, "loss": 3.3795719146728516, "step": 25500 }, { "epoch": 0.20592010203175576, "grad_norm": 0.7619324326515198, "learning_rate": 1.5892022002697837e-05, "loss": 3.494301605224609, "step": 25510 }, { "epoch": 0.20600082335752282, "grad_norm": 0.8409522175788879, "learning_rate": 1.5890406532959625e-05, "loss": 2.903759002685547, "step": 25520 }, { "epoch": 0.20608154468328987, "grad_norm": 0.791322648525238, "learning_rate": 1.588879106322141e-05, "loss": 3.1131553649902344, "step": 25530 }, { "epoch": 0.20616226600905693, "grad_norm": 1.0901691913604736, "learning_rate": 1.5887175593483197e-05, "loss": 3.249343490600586, "step": 25540 }, { "epoch": 0.206242987334824, "grad_norm": 1.2401378154754639, "learning_rate": 1.5885560123744984e-05, "loss": 2.751068687438965, "step": 25550 }, { "epoch": 0.20632370866059105, "grad_norm": 1.33262038230896, "learning_rate": 1.5883944654006772e-05, "loss": 3.3434829711914062, "step": 25560 }, { "epoch": 0.2064044299863581, "grad_norm": 1.2889560461044312, "learning_rate": 1.5882329184268556e-05, "loss": 2.985773277282715, "step": 25570 }, { "epoch": 0.20648515131212514, "grad_norm": 1.730973482131958, "learning_rate": 1.5880713714530344e-05, "loss": 3.42938117980957, "step": 25580 }, { "epoch": 0.2065658726378922, "grad_norm": 0.8943193554878235, "learning_rate": 1.5879098244792132e-05, "loss": 3.0223392486572265, "step": 25590 }, { "epoch": 0.20664659396365925, "grad_norm": 0.7864869832992554, "learning_rate": 1.587748277505392e-05, "loss": 3.2266632080078126, "step": 25600 }, { "epoch": 0.2067273152894263, "grad_norm": 0.9531352519989014, "learning_rate": 1.5875867305315704e-05, "loss": 3.1373682022094727, "step": 25610 }, { "epoch": 0.20680803661519337, "grad_norm": 1.092953085899353, "learning_rate": 1.587425183557749e-05, "loss": 3.2173225402832033, "step": 25620 }, { "epoch": 0.20688875794096043, "grad_norm": 0.6566843390464783, "learning_rate": 1.587263636583928e-05, "loss": 2.9215137481689455, "step": 25630 }, { "epoch": 0.20696947926672749, "grad_norm": 1.1841331720352173, "learning_rate": 1.5871020896101067e-05, "loss": 3.7202877044677733, "step": 25640 }, { "epoch": 0.20705020059249454, "grad_norm": 1.017325758934021, "learning_rate": 1.586940542636285e-05, "loss": 3.565715789794922, "step": 25650 }, { "epoch": 0.20713092191826157, "grad_norm": 0.7746817469596863, "learning_rate": 1.586778995662464e-05, "loss": 2.780234718322754, "step": 25660 }, { "epoch": 0.20721164324402863, "grad_norm": 1.2108510732650757, "learning_rate": 1.5866174486886427e-05, "loss": 3.573355865478516, "step": 25670 }, { "epoch": 0.2072923645697957, "grad_norm": 1.3278729915618896, "learning_rate": 1.5864559017148214e-05, "loss": 2.9939458847045897, "step": 25680 }, { "epoch": 0.20737308589556275, "grad_norm": 0.5896295309066772, "learning_rate": 1.586294354741e-05, "loss": 3.354977035522461, "step": 25690 }, { "epoch": 0.2074538072213298, "grad_norm": 0.7530423402786255, "learning_rate": 1.5861328077671786e-05, "loss": 2.8957069396972654, "step": 25700 }, { "epoch": 0.20753452854709686, "grad_norm": 0.9550365805625916, "learning_rate": 1.5859712607933574e-05, "loss": 2.6761213302612306, "step": 25710 }, { "epoch": 0.20761524987286392, "grad_norm": 1.1886792182922363, "learning_rate": 1.585809713819536e-05, "loss": 2.732614517211914, "step": 25720 }, { "epoch": 0.20769597119863098, "grad_norm": 0.9445701837539673, "learning_rate": 1.5856481668457146e-05, "loss": 3.001656723022461, "step": 25730 }, { "epoch": 0.207776692524398, "grad_norm": 1.1729289293289185, "learning_rate": 1.5854866198718934e-05, "loss": 3.2716114044189455, "step": 25740 }, { "epoch": 0.20785741385016507, "grad_norm": 0.9496493935585022, "learning_rate": 1.585325072898072e-05, "loss": 2.883428955078125, "step": 25750 }, { "epoch": 0.20793813517593213, "grad_norm": 0.8696274757385254, "learning_rate": 1.585163525924251e-05, "loss": 2.8452924728393554, "step": 25760 }, { "epoch": 0.20801885650169918, "grad_norm": 0.8992044925689697, "learning_rate": 1.5850019789504293e-05, "loss": 3.033374214172363, "step": 25770 }, { "epoch": 0.20809957782746624, "grad_norm": 0.7037824988365173, "learning_rate": 1.584840431976608e-05, "loss": 3.218989944458008, "step": 25780 }, { "epoch": 0.2081802991532333, "grad_norm": 0.7233229875564575, "learning_rate": 1.584678885002787e-05, "loss": 3.537976837158203, "step": 25790 }, { "epoch": 0.20826102047900036, "grad_norm": 1.0740947723388672, "learning_rate": 1.5845173380289656e-05, "loss": 3.08304500579834, "step": 25800 }, { "epoch": 0.2083417418047674, "grad_norm": 1.7481342554092407, "learning_rate": 1.584355791055144e-05, "loss": 3.151946449279785, "step": 25810 }, { "epoch": 0.20842246313053445, "grad_norm": 1.2110041379928589, "learning_rate": 1.584194244081323e-05, "loss": 3.0320131301879885, "step": 25820 }, { "epoch": 0.2085031844563015, "grad_norm": 0.8054879903793335, "learning_rate": 1.5840326971075016e-05, "loss": 3.729541778564453, "step": 25830 }, { "epoch": 0.20858390578206856, "grad_norm": 0.9868515729904175, "learning_rate": 1.5838711501336804e-05, "loss": 3.2566249847412108, "step": 25840 }, { "epoch": 0.20866462710783562, "grad_norm": 1.351500391960144, "learning_rate": 1.5837096031598588e-05, "loss": 3.4147975921630858, "step": 25850 }, { "epoch": 0.20874534843360268, "grad_norm": 0.9388991594314575, "learning_rate": 1.5835480561860376e-05, "loss": 3.19661865234375, "step": 25860 }, { "epoch": 0.20882606975936974, "grad_norm": 0.793785035610199, "learning_rate": 1.5833865092122163e-05, "loss": 3.09161319732666, "step": 25870 }, { "epoch": 0.2089067910851368, "grad_norm": 0.9946516156196594, "learning_rate": 1.583224962238395e-05, "loss": 3.1995006561279298, "step": 25880 }, { "epoch": 0.20898751241090383, "grad_norm": 1.0860395431518555, "learning_rate": 1.5830634152645735e-05, "loss": 3.18900146484375, "step": 25890 }, { "epoch": 0.20906823373667088, "grad_norm": 0.6852635145187378, "learning_rate": 1.5829018682907523e-05, "loss": 3.1349069595336916, "step": 25900 }, { "epoch": 0.20914895506243794, "grad_norm": 1.2002617120742798, "learning_rate": 1.582740321316931e-05, "loss": 3.4296489715576173, "step": 25910 }, { "epoch": 0.209229676388205, "grad_norm": 0.9863008260726929, "learning_rate": 1.58257877434311e-05, "loss": 2.982265853881836, "step": 25920 }, { "epoch": 0.20931039771397206, "grad_norm": 0.8024097681045532, "learning_rate": 1.5824172273692883e-05, "loss": 3.0599573135375975, "step": 25930 }, { "epoch": 0.20939111903973912, "grad_norm": 0.9413595199584961, "learning_rate": 1.582255680395467e-05, "loss": 3.4465076446533205, "step": 25940 }, { "epoch": 0.20947184036550617, "grad_norm": 1.8361793756484985, "learning_rate": 1.5820941334216458e-05, "loss": 2.7313507080078123, "step": 25950 }, { "epoch": 0.2095525616912732, "grad_norm": 1.0450862646102905, "learning_rate": 1.5819325864478246e-05, "loss": 3.19061393737793, "step": 25960 }, { "epoch": 0.20963328301704026, "grad_norm": 1.6795330047607422, "learning_rate": 1.581771039474003e-05, "loss": 2.865907669067383, "step": 25970 }, { "epoch": 0.20971400434280732, "grad_norm": 1.3696290254592896, "learning_rate": 1.5816094925001818e-05, "loss": 3.3116153717041015, "step": 25980 }, { "epoch": 0.20979472566857438, "grad_norm": 0.7689319849014282, "learning_rate": 1.5814479455263606e-05, "loss": 2.8973875045776367, "step": 25990 }, { "epoch": 0.20987544699434144, "grad_norm": 1.00851309299469, "learning_rate": 1.5812863985525393e-05, "loss": 2.9396617889404295, "step": 26000 }, { "epoch": 0.2099561683201085, "grad_norm": 0.7299342751502991, "learning_rate": 1.5811248515787178e-05, "loss": 3.2275157928466798, "step": 26010 }, { "epoch": 0.21003688964587555, "grad_norm": 0.9360201358795166, "learning_rate": 1.5809633046048965e-05, "loss": 3.0694507598876952, "step": 26020 }, { "epoch": 0.2101176109716426, "grad_norm": 0.7817157506942749, "learning_rate": 1.5808017576310753e-05, "loss": 2.9364797592163088, "step": 26030 }, { "epoch": 0.21019833229740964, "grad_norm": 1.1887212991714478, "learning_rate": 1.580640210657254e-05, "loss": 3.3010311126708984, "step": 26040 }, { "epoch": 0.2102790536231767, "grad_norm": 1.5113308429718018, "learning_rate": 1.5804786636834325e-05, "loss": 2.9874666213989256, "step": 26050 }, { "epoch": 0.21035977494894376, "grad_norm": 1.0559662580490112, "learning_rate": 1.5803171167096113e-05, "loss": 3.683145523071289, "step": 26060 }, { "epoch": 0.21044049627471081, "grad_norm": 1.2156741619110107, "learning_rate": 1.58015556973579e-05, "loss": 3.5717239379882812, "step": 26070 }, { "epoch": 0.21052121760047787, "grad_norm": 0.9306620955467224, "learning_rate": 1.5799940227619688e-05, "loss": 3.038975715637207, "step": 26080 }, { "epoch": 0.21060193892624493, "grad_norm": 0.6447098851203918, "learning_rate": 1.5798324757881472e-05, "loss": 3.073008918762207, "step": 26090 }, { "epoch": 0.210682660252012, "grad_norm": 0.577608048915863, "learning_rate": 1.579670928814326e-05, "loss": 3.3322494506835936, "step": 26100 }, { "epoch": 0.21076338157777905, "grad_norm": 0.7800047397613525, "learning_rate": 1.5795093818405048e-05, "loss": 3.356062316894531, "step": 26110 }, { "epoch": 0.21084410290354608, "grad_norm": 0.8641521334648132, "learning_rate": 1.5793478348666835e-05, "loss": 3.241907501220703, "step": 26120 }, { "epoch": 0.21092482422931313, "grad_norm": 0.7437555193901062, "learning_rate": 1.579186287892862e-05, "loss": 3.075681686401367, "step": 26130 }, { "epoch": 0.2110055455550802, "grad_norm": 1.489762783050537, "learning_rate": 1.5790247409190407e-05, "loss": 3.253290557861328, "step": 26140 }, { "epoch": 0.21108626688084725, "grad_norm": 1.1796393394470215, "learning_rate": 1.5788631939452195e-05, "loss": 3.204976272583008, "step": 26150 }, { "epoch": 0.2111669882066143, "grad_norm": 1.3278203010559082, "learning_rate": 1.5787016469713983e-05, "loss": 3.0657276153564452, "step": 26160 }, { "epoch": 0.21124770953238137, "grad_norm": 0.748521089553833, "learning_rate": 1.5785400999975767e-05, "loss": 3.01267032623291, "step": 26170 }, { "epoch": 0.21132843085814842, "grad_norm": 1.300880789756775, "learning_rate": 1.5783785530237555e-05, "loss": 3.2329971313476564, "step": 26180 }, { "epoch": 0.21140915218391546, "grad_norm": 0.7856003642082214, "learning_rate": 1.5782170060499343e-05, "loss": 2.9377716064453123, "step": 26190 }, { "epoch": 0.2114898735096825, "grad_norm": 2.8226358890533447, "learning_rate": 1.578055459076113e-05, "loss": 2.9900325775146483, "step": 26200 }, { "epoch": 0.21157059483544957, "grad_norm": 1.1386017799377441, "learning_rate": 1.5778939121022915e-05, "loss": 3.122510528564453, "step": 26210 }, { "epoch": 0.21165131616121663, "grad_norm": 1.2200472354888916, "learning_rate": 1.5777323651284702e-05, "loss": 2.905192756652832, "step": 26220 }, { "epoch": 0.2117320374869837, "grad_norm": 0.5775496959686279, "learning_rate": 1.577570818154649e-05, "loss": 3.091299819946289, "step": 26230 }, { "epoch": 0.21181275881275075, "grad_norm": 0.9926269054412842, "learning_rate": 1.5774092711808278e-05, "loss": 3.2268871307373046, "step": 26240 }, { "epoch": 0.2118934801385178, "grad_norm": 1.0048643350601196, "learning_rate": 1.5772477242070062e-05, "loss": 2.9279912948608398, "step": 26250 }, { "epoch": 0.21197420146428486, "grad_norm": 1.032799482345581, "learning_rate": 1.5770861772331853e-05, "loss": 3.3144718170166017, "step": 26260 }, { "epoch": 0.2120549227900519, "grad_norm": 0.7884994745254517, "learning_rate": 1.5769246302593637e-05, "loss": 3.230487823486328, "step": 26270 }, { "epoch": 0.21213564411581895, "grad_norm": 1.4074883460998535, "learning_rate": 1.5767630832855425e-05, "loss": 2.735685348510742, "step": 26280 }, { "epoch": 0.212216365441586, "grad_norm": 0.8939530253410339, "learning_rate": 1.576601536311721e-05, "loss": 2.925967979431152, "step": 26290 }, { "epoch": 0.21229708676735307, "grad_norm": 1.2014089822769165, "learning_rate": 1.5764399893379e-05, "loss": 3.1376861572265624, "step": 26300 }, { "epoch": 0.21237780809312012, "grad_norm": 1.0379481315612793, "learning_rate": 1.5762784423640785e-05, "loss": 3.2912677764892577, "step": 26310 }, { "epoch": 0.21245852941888718, "grad_norm": 1.7745740413665771, "learning_rate": 1.5761168953902572e-05, "loss": 3.424519729614258, "step": 26320 }, { "epoch": 0.21253925074465424, "grad_norm": 0.8695784211158752, "learning_rate": 1.5759553484164357e-05, "loss": 3.0050697326660156, "step": 26330 }, { "epoch": 0.2126199720704213, "grad_norm": 0.7177550792694092, "learning_rate": 1.5757938014426148e-05, "loss": 3.0816957473754885, "step": 26340 }, { "epoch": 0.21270069339618833, "grad_norm": 0.9531099796295166, "learning_rate": 1.5756322544687932e-05, "loss": 3.188249397277832, "step": 26350 }, { "epoch": 0.2127814147219554, "grad_norm": 0.7655831575393677, "learning_rate": 1.575470707494972e-05, "loss": 3.3152847290039062, "step": 26360 }, { "epoch": 0.21286213604772244, "grad_norm": 0.8104836940765381, "learning_rate": 1.5753091605211504e-05, "loss": 2.7371376037597654, "step": 26370 }, { "epoch": 0.2129428573734895, "grad_norm": 1.1327919960021973, "learning_rate": 1.5751476135473295e-05, "loss": 3.350865936279297, "step": 26380 }, { "epoch": 0.21302357869925656, "grad_norm": 1.0011677742004395, "learning_rate": 1.574986066573508e-05, "loss": 3.776154327392578, "step": 26390 }, { "epoch": 0.21310430002502362, "grad_norm": 0.8797413110733032, "learning_rate": 1.5748245195996867e-05, "loss": 3.038040542602539, "step": 26400 }, { "epoch": 0.21318502135079068, "grad_norm": 1.1193581819534302, "learning_rate": 1.5746629726258655e-05, "loss": 3.5103836059570312, "step": 26410 }, { "epoch": 0.2132657426765577, "grad_norm": 1.4907106161117554, "learning_rate": 1.5745014256520443e-05, "loss": 3.1152807235717774, "step": 26420 }, { "epoch": 0.21334646400232476, "grad_norm": 0.8224161863327026, "learning_rate": 1.5743398786782227e-05, "loss": 3.056907081604004, "step": 26430 }, { "epoch": 0.21342718532809182, "grad_norm": 1.0421112775802612, "learning_rate": 1.5741783317044015e-05, "loss": 2.989522361755371, "step": 26440 }, { "epoch": 0.21350790665385888, "grad_norm": 1.2376927137374878, "learning_rate": 1.5740167847305802e-05, "loss": 3.0730144500732424, "step": 26450 }, { "epoch": 0.21358862797962594, "grad_norm": 0.5636734962463379, "learning_rate": 1.573855237756759e-05, "loss": 3.1178314208984377, "step": 26460 }, { "epoch": 0.213669349305393, "grad_norm": 0.9512313604354858, "learning_rate": 1.5736936907829374e-05, "loss": 3.080440902709961, "step": 26470 }, { "epoch": 0.21375007063116006, "grad_norm": 1.164845585823059, "learning_rate": 1.5735321438091162e-05, "loss": 2.8637615203857423, "step": 26480 }, { "epoch": 0.2138307919569271, "grad_norm": 0.6036603450775146, "learning_rate": 1.573370596835295e-05, "loss": 3.0214839935302735, "step": 26490 }, { "epoch": 0.21391151328269414, "grad_norm": 0.9458022713661194, "learning_rate": 1.5732090498614737e-05, "loss": 3.2704647064208983, "step": 26500 }, { "epoch": 0.2139922346084612, "grad_norm": 0.6968720555305481, "learning_rate": 1.573047502887652e-05, "loss": 3.3339248657226563, "step": 26510 }, { "epoch": 0.21407295593422826, "grad_norm": 1.377833366394043, "learning_rate": 1.572885955913831e-05, "loss": 3.025181770324707, "step": 26520 }, { "epoch": 0.21415367725999532, "grad_norm": 1.373055100440979, "learning_rate": 1.5727244089400097e-05, "loss": 3.6107696533203124, "step": 26530 }, { "epoch": 0.21423439858576238, "grad_norm": 0.9356703162193298, "learning_rate": 1.5725628619661885e-05, "loss": 3.1862424850463866, "step": 26540 }, { "epoch": 0.21431511991152943, "grad_norm": 0.8262930512428284, "learning_rate": 1.5724013149923672e-05, "loss": 3.047212600708008, "step": 26550 }, { "epoch": 0.2143958412372965, "grad_norm": 0.5415867567062378, "learning_rate": 1.5722397680185457e-05, "loss": 2.9245878219604493, "step": 26560 }, { "epoch": 0.21447656256306352, "grad_norm": 0.5152186751365662, "learning_rate": 1.5720782210447244e-05, "loss": 3.657674026489258, "step": 26570 }, { "epoch": 0.21455728388883058, "grad_norm": 1.6222506761550903, "learning_rate": 1.5719166740709032e-05, "loss": 3.603556442260742, "step": 26580 }, { "epoch": 0.21463800521459764, "grad_norm": 1.2422047853469849, "learning_rate": 1.571755127097082e-05, "loss": 3.1794193267822264, "step": 26590 }, { "epoch": 0.2147187265403647, "grad_norm": 0.864494800567627, "learning_rate": 1.5715935801232604e-05, "loss": 2.6607154846191405, "step": 26600 }, { "epoch": 0.21479944786613175, "grad_norm": 0.9071939587593079, "learning_rate": 1.5714320331494392e-05, "loss": 2.8743318557739257, "step": 26610 }, { "epoch": 0.2148801691918988, "grad_norm": 1.0150372982025146, "learning_rate": 1.571270486175618e-05, "loss": 3.2199832916259767, "step": 26620 }, { "epoch": 0.21496089051766587, "grad_norm": 1.1920711994171143, "learning_rate": 1.5711089392017967e-05, "loss": 3.5333106994628904, "step": 26630 }, { "epoch": 0.21504161184343293, "grad_norm": 0.9668617248535156, "learning_rate": 1.570947392227975e-05, "loss": 3.282174301147461, "step": 26640 }, { "epoch": 0.21512233316919996, "grad_norm": 1.0364750623703003, "learning_rate": 1.570785845254154e-05, "loss": 3.1516435623168944, "step": 26650 }, { "epoch": 0.21520305449496702, "grad_norm": 1.0185325145721436, "learning_rate": 1.5706242982803327e-05, "loss": 3.1067388534545897, "step": 26660 }, { "epoch": 0.21528377582073407, "grad_norm": 1.1871604919433594, "learning_rate": 1.5704627513065115e-05, "loss": 3.471052551269531, "step": 26670 }, { "epoch": 0.21536449714650113, "grad_norm": 1.5768588781356812, "learning_rate": 1.57030120433269e-05, "loss": 3.1050920486450195, "step": 26680 }, { "epoch": 0.2154452184722682, "grad_norm": 0.9969099760055542, "learning_rate": 1.5701396573588687e-05, "loss": 3.7378562927246093, "step": 26690 }, { "epoch": 0.21552593979803525, "grad_norm": 1.279266119003296, "learning_rate": 1.5699781103850474e-05, "loss": 3.215260696411133, "step": 26700 }, { "epoch": 0.2156066611238023, "grad_norm": 0.6476396918296814, "learning_rate": 1.5698165634112262e-05, "loss": 3.3946151733398438, "step": 26710 }, { "epoch": 0.21568738244956936, "grad_norm": 1.095795750617981, "learning_rate": 1.5696550164374046e-05, "loss": 3.11148624420166, "step": 26720 }, { "epoch": 0.2157681037753364, "grad_norm": 0.9092044234275818, "learning_rate": 1.5694934694635834e-05, "loss": 3.6309326171875, "step": 26730 }, { "epoch": 0.21584882510110345, "grad_norm": 1.3051785230636597, "learning_rate": 1.569331922489762e-05, "loss": 3.0742071151733397, "step": 26740 }, { "epoch": 0.2159295464268705, "grad_norm": 1.4406025409698486, "learning_rate": 1.569170375515941e-05, "loss": 3.108966064453125, "step": 26750 }, { "epoch": 0.21601026775263757, "grad_norm": 1.3944896459579468, "learning_rate": 1.5690088285421194e-05, "loss": 3.2437294006347654, "step": 26760 }, { "epoch": 0.21609098907840463, "grad_norm": 1.080879807472229, "learning_rate": 1.568847281568298e-05, "loss": 3.2666465759277346, "step": 26770 }, { "epoch": 0.21617171040417169, "grad_norm": 1.0822241306304932, "learning_rate": 1.568685734594477e-05, "loss": 3.3404354095458983, "step": 26780 }, { "epoch": 0.21625243172993874, "grad_norm": 0.6031043529510498, "learning_rate": 1.5685241876206557e-05, "loss": 3.027128791809082, "step": 26790 }, { "epoch": 0.21633315305570577, "grad_norm": 1.4449955224990845, "learning_rate": 1.568362640646834e-05, "loss": 2.910160446166992, "step": 26800 }, { "epoch": 0.21641387438147283, "grad_norm": 0.7737096548080444, "learning_rate": 1.568201093673013e-05, "loss": 2.8072412490844725, "step": 26810 }, { "epoch": 0.2164945957072399, "grad_norm": 1.056820034980774, "learning_rate": 1.5680395466991916e-05, "loss": 3.067550468444824, "step": 26820 }, { "epoch": 0.21657531703300695, "grad_norm": 0.8033737540245056, "learning_rate": 1.5678779997253704e-05, "loss": 2.9368188858032225, "step": 26830 }, { "epoch": 0.216656038358774, "grad_norm": 0.7742269039154053, "learning_rate": 1.567716452751549e-05, "loss": 3.1331146240234373, "step": 26840 }, { "epoch": 0.21673675968454106, "grad_norm": 1.4923347234725952, "learning_rate": 1.5675549057777276e-05, "loss": 3.104203987121582, "step": 26850 }, { "epoch": 0.21681748101030812, "grad_norm": 2.5168063640594482, "learning_rate": 1.5673933588039064e-05, "loss": 3.6408222198486326, "step": 26860 }, { "epoch": 0.21689820233607518, "grad_norm": 1.3051451444625854, "learning_rate": 1.567231811830085e-05, "loss": 2.7636930465698244, "step": 26870 }, { "epoch": 0.2169789236618422, "grad_norm": 0.8212274312973022, "learning_rate": 1.5670702648562636e-05, "loss": 3.20178337097168, "step": 26880 }, { "epoch": 0.21705964498760927, "grad_norm": 1.0592418909072876, "learning_rate": 1.5669087178824423e-05, "loss": 3.209359359741211, "step": 26890 }, { "epoch": 0.21714036631337633, "grad_norm": 0.9154030084609985, "learning_rate": 1.566747170908621e-05, "loss": 3.086195373535156, "step": 26900 }, { "epoch": 0.21722108763914338, "grad_norm": 0.9705712199211121, "learning_rate": 1.5665856239348e-05, "loss": 3.3401020050048826, "step": 26910 }, { "epoch": 0.21730180896491044, "grad_norm": 0.6570797562599182, "learning_rate": 1.5664240769609783e-05, "loss": 3.178059959411621, "step": 26920 }, { "epoch": 0.2173825302906775, "grad_norm": 0.6258926391601562, "learning_rate": 1.566262529987157e-05, "loss": 3.145083999633789, "step": 26930 }, { "epoch": 0.21746325161644456, "grad_norm": 0.9688103795051575, "learning_rate": 1.566100983013336e-05, "loss": 2.9074831008911133, "step": 26940 }, { "epoch": 0.21754397294221162, "grad_norm": 0.824474573135376, "learning_rate": 1.5659394360395146e-05, "loss": 3.0993673324584963, "step": 26950 }, { "epoch": 0.21762469426797865, "grad_norm": 1.0754348039627075, "learning_rate": 1.565777889065693e-05, "loss": 2.733714294433594, "step": 26960 }, { "epoch": 0.2177054155937457, "grad_norm": 0.7681884169578552, "learning_rate": 1.5656163420918718e-05, "loss": 2.939097213745117, "step": 26970 }, { "epoch": 0.21778613691951276, "grad_norm": 1.652483344078064, "learning_rate": 1.5654547951180506e-05, "loss": 2.9610769271850588, "step": 26980 }, { "epoch": 0.21786685824527982, "grad_norm": 0.9920288324356079, "learning_rate": 1.5652932481442294e-05, "loss": 3.5448623657226563, "step": 26990 }, { "epoch": 0.21794757957104688, "grad_norm": 0.8509569764137268, "learning_rate": 1.5651317011704078e-05, "loss": 3.886212921142578, "step": 27000 }, { "epoch": 0.21802830089681394, "grad_norm": 0.9755154848098755, "learning_rate": 1.5649701541965866e-05, "loss": 3.326393890380859, "step": 27010 }, { "epoch": 0.218109022222581, "grad_norm": 1.047716498374939, "learning_rate": 1.5648086072227653e-05, "loss": 2.850225067138672, "step": 27020 }, { "epoch": 0.21818974354834803, "grad_norm": 0.9277932047843933, "learning_rate": 1.564647060248944e-05, "loss": 3.4812667846679686, "step": 27030 }, { "epoch": 0.21827046487411508, "grad_norm": 0.7759116888046265, "learning_rate": 1.5644855132751225e-05, "loss": 2.7935104370117188, "step": 27040 }, { "epoch": 0.21835118619988214, "grad_norm": 0.7221095561981201, "learning_rate": 1.5643239663013013e-05, "loss": 3.156010055541992, "step": 27050 }, { "epoch": 0.2184319075256492, "grad_norm": 1.4858179092407227, "learning_rate": 1.56416241932748e-05, "loss": 3.367002487182617, "step": 27060 }, { "epoch": 0.21851262885141626, "grad_norm": 0.749954879283905, "learning_rate": 1.564000872353659e-05, "loss": 2.968985366821289, "step": 27070 }, { "epoch": 0.21859335017718332, "grad_norm": 0.6981998085975647, "learning_rate": 1.5638393253798373e-05, "loss": 3.029959869384766, "step": 27080 }, { "epoch": 0.21867407150295037, "grad_norm": 1.3820682764053345, "learning_rate": 1.563677778406016e-05, "loss": 2.8999820709228517, "step": 27090 }, { "epoch": 0.21875479282871743, "grad_norm": 0.947094202041626, "learning_rate": 1.5635162314321948e-05, "loss": 3.1752614974975586, "step": 27100 }, { "epoch": 0.21883551415448446, "grad_norm": 0.6577728986740112, "learning_rate": 1.5633546844583736e-05, "loss": 3.591505432128906, "step": 27110 }, { "epoch": 0.21891623548025152, "grad_norm": 1.5484681129455566, "learning_rate": 1.563193137484552e-05, "loss": 2.9697898864746093, "step": 27120 }, { "epoch": 0.21899695680601858, "grad_norm": 1.335992693901062, "learning_rate": 1.563031590510731e-05, "loss": 3.1341068267822267, "step": 27130 }, { "epoch": 0.21907767813178564, "grad_norm": 0.7550294995307922, "learning_rate": 1.5628700435369095e-05, "loss": 3.068496894836426, "step": 27140 }, { "epoch": 0.2191583994575527, "grad_norm": 1.502482295036316, "learning_rate": 1.5627084965630883e-05, "loss": 3.0225086212158203, "step": 27150 }, { "epoch": 0.21923912078331975, "grad_norm": 0.5806695222854614, "learning_rate": 1.5625469495892667e-05, "loss": 3.430340576171875, "step": 27160 }, { "epoch": 0.2193198421090868, "grad_norm": 0.8175938725471497, "learning_rate": 1.562385402615446e-05, "loss": 3.3715423583984374, "step": 27170 }, { "epoch": 0.21940056343485384, "grad_norm": 1.075766921043396, "learning_rate": 1.5622238556416243e-05, "loss": 3.074481201171875, "step": 27180 }, { "epoch": 0.2194812847606209, "grad_norm": 1.4181630611419678, "learning_rate": 1.562062308667803e-05, "loss": 3.349375915527344, "step": 27190 }, { "epoch": 0.21956200608638796, "grad_norm": 1.1002821922302246, "learning_rate": 1.5619007616939815e-05, "loss": 2.9015134811401366, "step": 27200 }, { "epoch": 0.21964272741215501, "grad_norm": 0.8813608288764954, "learning_rate": 1.5617392147201606e-05, "loss": 3.4280979156494142, "step": 27210 }, { "epoch": 0.21972344873792207, "grad_norm": 0.8614421486854553, "learning_rate": 1.561577667746339e-05, "loss": 3.139096260070801, "step": 27220 }, { "epoch": 0.21980417006368913, "grad_norm": 1.7195130586624146, "learning_rate": 1.5614161207725178e-05, "loss": 3.636074447631836, "step": 27230 }, { "epoch": 0.2198848913894562, "grad_norm": 0.9196454286575317, "learning_rate": 1.5612545737986962e-05, "loss": 3.444045639038086, "step": 27240 }, { "epoch": 0.21996561271522325, "grad_norm": 0.7763029932975769, "learning_rate": 1.5610930268248753e-05, "loss": 3.2996189117431642, "step": 27250 }, { "epoch": 0.22004633404099028, "grad_norm": 1.2867933511734009, "learning_rate": 1.5609314798510538e-05, "loss": 3.3139602661132814, "step": 27260 }, { "epoch": 0.22012705536675733, "grad_norm": 0.7388172745704651, "learning_rate": 1.5607699328772325e-05, "loss": 3.5995540618896484, "step": 27270 }, { "epoch": 0.2202077766925244, "grad_norm": 1.3976937532424927, "learning_rate": 1.5606083859034113e-05, "loss": 3.1068727493286135, "step": 27280 }, { "epoch": 0.22028849801829145, "grad_norm": 1.5883963108062744, "learning_rate": 1.56044683892959e-05, "loss": 3.6015625, "step": 27290 }, { "epoch": 0.2203692193440585, "grad_norm": 0.6038281917572021, "learning_rate": 1.5602852919557685e-05, "loss": 3.5991195678710937, "step": 27300 }, { "epoch": 0.22044994066982557, "grad_norm": 0.6938085556030273, "learning_rate": 1.5601237449819473e-05, "loss": 2.691168022155762, "step": 27310 }, { "epoch": 0.22053066199559263, "grad_norm": 1.6989086866378784, "learning_rate": 1.559962198008126e-05, "loss": 3.0035104751586914, "step": 27320 }, { "epoch": 0.22061138332135968, "grad_norm": 0.8587160706520081, "learning_rate": 1.5598006510343048e-05, "loss": 3.748285675048828, "step": 27330 }, { "epoch": 0.2206921046471267, "grad_norm": 1.1064302921295166, "learning_rate": 1.5596391040604832e-05, "loss": 2.9584888458251952, "step": 27340 }, { "epoch": 0.22077282597289377, "grad_norm": 1.06569242477417, "learning_rate": 1.559477557086662e-05, "loss": 3.347403335571289, "step": 27350 }, { "epoch": 0.22085354729866083, "grad_norm": 1.2141892910003662, "learning_rate": 1.5593160101128408e-05, "loss": 3.066924476623535, "step": 27360 }, { "epoch": 0.2209342686244279, "grad_norm": 1.017633318901062, "learning_rate": 1.5591544631390195e-05, "loss": 3.159153366088867, "step": 27370 }, { "epoch": 0.22101498995019495, "grad_norm": 0.9421135187149048, "learning_rate": 1.558992916165198e-05, "loss": 3.425338363647461, "step": 27380 }, { "epoch": 0.221095711275962, "grad_norm": 0.899530827999115, "learning_rate": 1.5588313691913767e-05, "loss": 3.13861141204834, "step": 27390 }, { "epoch": 0.22117643260172906, "grad_norm": 0.9237741231918335, "learning_rate": 1.5586698222175555e-05, "loss": 3.2288475036621094, "step": 27400 }, { "epoch": 0.2212571539274961, "grad_norm": 0.6546847224235535, "learning_rate": 1.5585082752437343e-05, "loss": 3.3258785247802733, "step": 27410 }, { "epoch": 0.22133787525326315, "grad_norm": 0.7302932739257812, "learning_rate": 1.5583467282699127e-05, "loss": 3.267515182495117, "step": 27420 }, { "epoch": 0.2214185965790302, "grad_norm": 1.110649824142456, "learning_rate": 1.5581851812960915e-05, "loss": 3.0566884994506838, "step": 27430 }, { "epoch": 0.22149931790479727, "grad_norm": 0.645240068435669, "learning_rate": 1.5580236343222702e-05, "loss": 3.3343902587890626, "step": 27440 }, { "epoch": 0.22158003923056432, "grad_norm": 0.8610450625419617, "learning_rate": 1.557862087348449e-05, "loss": 2.925890350341797, "step": 27450 }, { "epoch": 0.22166076055633138, "grad_norm": 1.2565821409225464, "learning_rate": 1.5577005403746274e-05, "loss": 3.1556528091430662, "step": 27460 }, { "epoch": 0.22174148188209844, "grad_norm": 1.2499115467071533, "learning_rate": 1.5575389934008062e-05, "loss": 3.2158206939697265, "step": 27470 }, { "epoch": 0.2218222032078655, "grad_norm": 1.3081297874450684, "learning_rate": 1.557377446426985e-05, "loss": 3.2739883422851563, "step": 27480 }, { "epoch": 0.22190292453363253, "grad_norm": 1.1660387516021729, "learning_rate": 1.5572158994531638e-05, "loss": 3.116009330749512, "step": 27490 }, { "epoch": 0.2219836458593996, "grad_norm": 1.500712275505066, "learning_rate": 1.5570543524793422e-05, "loss": 2.781157684326172, "step": 27500 }, { "epoch": 0.22206436718516664, "grad_norm": 1.0943403244018555, "learning_rate": 1.556892805505521e-05, "loss": 3.175373840332031, "step": 27510 }, { "epoch": 0.2221450885109337, "grad_norm": 0.6220307350158691, "learning_rate": 1.5567312585316997e-05, "loss": 3.0502094268798827, "step": 27520 }, { "epoch": 0.22222580983670076, "grad_norm": 0.8261995911598206, "learning_rate": 1.5565697115578785e-05, "loss": 2.9757558822631838, "step": 27530 }, { "epoch": 0.22230653116246782, "grad_norm": 0.863796055316925, "learning_rate": 1.556408164584057e-05, "loss": 3.252825927734375, "step": 27540 }, { "epoch": 0.22238725248823488, "grad_norm": 1.008484125137329, "learning_rate": 1.5562466176102357e-05, "loss": 2.7834566116333006, "step": 27550 }, { "epoch": 0.22246797381400193, "grad_norm": 0.6507522463798523, "learning_rate": 1.5560850706364145e-05, "loss": 3.222906494140625, "step": 27560 }, { "epoch": 0.22254869513976897, "grad_norm": 1.3976366519927979, "learning_rate": 1.5559235236625932e-05, "loss": 3.0331119537353515, "step": 27570 }, { "epoch": 0.22262941646553602, "grad_norm": 1.336549162864685, "learning_rate": 1.5557619766887717e-05, "loss": 3.5066593170166014, "step": 27580 }, { "epoch": 0.22271013779130308, "grad_norm": 1.3515123128890991, "learning_rate": 1.5556004297149504e-05, "loss": 3.26075439453125, "step": 27590 }, { "epoch": 0.22279085911707014, "grad_norm": 1.0663235187530518, "learning_rate": 1.5554388827411292e-05, "loss": 3.320547103881836, "step": 27600 }, { "epoch": 0.2228715804428372, "grad_norm": 0.5799064040184021, "learning_rate": 1.555277335767308e-05, "loss": 3.0573144912719727, "step": 27610 }, { "epoch": 0.22295230176860426, "grad_norm": 1.1438508033752441, "learning_rate": 1.5551157887934864e-05, "loss": 3.065822410583496, "step": 27620 }, { "epoch": 0.2230330230943713, "grad_norm": 1.0635933876037598, "learning_rate": 1.5549542418196652e-05, "loss": 3.1640436172485353, "step": 27630 }, { "epoch": 0.22311374442013834, "grad_norm": 1.245566964149475, "learning_rate": 1.554792694845844e-05, "loss": 3.4542407989501953, "step": 27640 }, { "epoch": 0.2231944657459054, "grad_norm": 1.14785897731781, "learning_rate": 1.5546311478720227e-05, "loss": 3.0123922348022463, "step": 27650 }, { "epoch": 0.22327518707167246, "grad_norm": 0.621732771396637, "learning_rate": 1.554469600898201e-05, "loss": 3.238106918334961, "step": 27660 }, { "epoch": 0.22335590839743952, "grad_norm": 1.2712684869766235, "learning_rate": 1.55430805392438e-05, "loss": 3.0271059036254884, "step": 27670 }, { "epoch": 0.22343662972320658, "grad_norm": 0.7893569469451904, "learning_rate": 1.5541465069505587e-05, "loss": 2.9523462295532226, "step": 27680 }, { "epoch": 0.22351735104897363, "grad_norm": 0.5295010805130005, "learning_rate": 1.5539849599767374e-05, "loss": 3.1716630935668944, "step": 27690 }, { "epoch": 0.2235980723747407, "grad_norm": 0.8364993929862976, "learning_rate": 1.553823413002916e-05, "loss": 3.592279052734375, "step": 27700 }, { "epoch": 0.22367879370050775, "grad_norm": 1.0905132293701172, "learning_rate": 1.5536618660290946e-05, "loss": 3.636008071899414, "step": 27710 }, { "epoch": 0.22375951502627478, "grad_norm": 0.7960389852523804, "learning_rate": 1.5535003190552734e-05, "loss": 2.617436981201172, "step": 27720 }, { "epoch": 0.22384023635204184, "grad_norm": 1.3545634746551514, "learning_rate": 1.5533387720814522e-05, "loss": 3.3338176727294924, "step": 27730 }, { "epoch": 0.2239209576778089, "grad_norm": 1.1305451393127441, "learning_rate": 1.5531772251076306e-05, "loss": 2.924452209472656, "step": 27740 }, { "epoch": 0.22400167900357595, "grad_norm": 1.0177909135818481, "learning_rate": 1.5530156781338094e-05, "loss": 3.4991901397705076, "step": 27750 }, { "epoch": 0.224082400329343, "grad_norm": 1.15743088722229, "learning_rate": 1.552854131159988e-05, "loss": 3.1406721115112304, "step": 27760 }, { "epoch": 0.22416312165511007, "grad_norm": 1.6253695487976074, "learning_rate": 1.552692584186167e-05, "loss": 3.0189239501953127, "step": 27770 }, { "epoch": 0.22424384298087713, "grad_norm": 1.1309318542480469, "learning_rate": 1.5525310372123454e-05, "loss": 3.131789779663086, "step": 27780 }, { "epoch": 0.22432456430664416, "grad_norm": 1.0709842443466187, "learning_rate": 1.552369490238524e-05, "loss": 3.4554290771484375, "step": 27790 }, { "epoch": 0.22440528563241122, "grad_norm": 0.6364173889160156, "learning_rate": 1.552207943264703e-05, "loss": 3.3494827270507814, "step": 27800 }, { "epoch": 0.22448600695817827, "grad_norm": 0.607870876789093, "learning_rate": 1.5520463962908817e-05, "loss": 3.128477096557617, "step": 27810 }, { "epoch": 0.22456672828394533, "grad_norm": 1.294471263885498, "learning_rate": 1.5518848493170604e-05, "loss": 2.6029550552368166, "step": 27820 }, { "epoch": 0.2246474496097124, "grad_norm": 0.8512957096099854, "learning_rate": 1.551723302343239e-05, "loss": 2.9847537994384767, "step": 27830 }, { "epoch": 0.22472817093547945, "grad_norm": 0.8954240083694458, "learning_rate": 1.5515617553694176e-05, "loss": 2.7010147094726564, "step": 27840 }, { "epoch": 0.2248088922612465, "grad_norm": 0.8120560646057129, "learning_rate": 1.5514002083955964e-05, "loss": 3.313295364379883, "step": 27850 }, { "epoch": 0.22488961358701356, "grad_norm": 1.4684664011001587, "learning_rate": 1.551238661421775e-05, "loss": 3.239004898071289, "step": 27860 }, { "epoch": 0.2249703349127806, "grad_norm": 1.2879917621612549, "learning_rate": 1.5510771144479536e-05, "loss": 3.248587417602539, "step": 27870 }, { "epoch": 0.22505105623854765, "grad_norm": 0.8572762608528137, "learning_rate": 1.5509155674741324e-05, "loss": 3.229111099243164, "step": 27880 }, { "epoch": 0.2251317775643147, "grad_norm": 1.352579951286316, "learning_rate": 1.550754020500311e-05, "loss": 3.1820003509521486, "step": 27890 }, { "epoch": 0.22521249889008177, "grad_norm": 1.0177876949310303, "learning_rate": 1.55059247352649e-05, "loss": 2.9938720703125, "step": 27900 }, { "epoch": 0.22529322021584883, "grad_norm": 0.8176954388618469, "learning_rate": 1.5504309265526683e-05, "loss": 3.258790969848633, "step": 27910 }, { "epoch": 0.22537394154161589, "grad_norm": 0.8277166485786438, "learning_rate": 1.550269379578847e-05, "loss": 3.267047119140625, "step": 27920 }, { "epoch": 0.22545466286738294, "grad_norm": 0.9229332208633423, "learning_rate": 1.550107832605026e-05, "loss": 3.371352767944336, "step": 27930 }, { "epoch": 0.22553538419315, "grad_norm": 0.971081554889679, "learning_rate": 1.5499462856312046e-05, "loss": 3.001968002319336, "step": 27940 }, { "epoch": 0.22561610551891703, "grad_norm": 0.7371448874473572, "learning_rate": 1.549784738657383e-05, "loss": 2.870077896118164, "step": 27950 }, { "epoch": 0.2256968268446841, "grad_norm": 0.7870156168937683, "learning_rate": 1.549623191683562e-05, "loss": 3.4995849609375, "step": 27960 }, { "epoch": 0.22577754817045115, "grad_norm": 0.9452940821647644, "learning_rate": 1.5494616447097406e-05, "loss": 3.230733871459961, "step": 27970 }, { "epoch": 0.2258582694962182, "grad_norm": 0.8584502935409546, "learning_rate": 1.5493000977359194e-05, "loss": 3.564556121826172, "step": 27980 }, { "epoch": 0.22593899082198526, "grad_norm": 0.8436158895492554, "learning_rate": 1.5491385507620978e-05, "loss": 3.3361934661865233, "step": 27990 }, { "epoch": 0.22601971214775232, "grad_norm": 0.6909319758415222, "learning_rate": 1.548977003788277e-05, "loss": 3.24761848449707, "step": 28000 }, { "epoch": 0.22610043347351938, "grad_norm": 0.6656150221824646, "learning_rate": 1.5488154568144554e-05, "loss": 3.406892013549805, "step": 28010 }, { "epoch": 0.2261811547992864, "grad_norm": 1.3030903339385986, "learning_rate": 1.548653909840634e-05, "loss": 3.278856658935547, "step": 28020 }, { "epoch": 0.22626187612505347, "grad_norm": 0.9843378067016602, "learning_rate": 1.5484923628668126e-05, "loss": 3.136921501159668, "step": 28030 }, { "epoch": 0.22634259745082053, "grad_norm": 1.1655802726745605, "learning_rate": 1.5483308158929917e-05, "loss": 3.3777061462402345, "step": 28040 }, { "epoch": 0.22642331877658758, "grad_norm": 0.8840271234512329, "learning_rate": 1.54816926891917e-05, "loss": 3.104554557800293, "step": 28050 }, { "epoch": 0.22650404010235464, "grad_norm": 1.035629153251648, "learning_rate": 1.548007721945349e-05, "loss": 3.211068344116211, "step": 28060 }, { "epoch": 0.2265847614281217, "grad_norm": 0.9597824215888977, "learning_rate": 1.5478461749715273e-05, "loss": 3.1214460372924804, "step": 28070 }, { "epoch": 0.22666548275388876, "grad_norm": 0.9737723469734192, "learning_rate": 1.5476846279977064e-05, "loss": 3.443276596069336, "step": 28080 }, { "epoch": 0.22674620407965582, "grad_norm": 1.1428446769714355, "learning_rate": 1.5475230810238848e-05, "loss": 3.274962615966797, "step": 28090 }, { "epoch": 0.22682692540542285, "grad_norm": 1.679940938949585, "learning_rate": 1.5473615340500636e-05, "loss": 3.7309123992919924, "step": 28100 }, { "epoch": 0.2269076467311899, "grad_norm": 1.0217043161392212, "learning_rate": 1.547199987076242e-05, "loss": 3.2154525756835937, "step": 28110 }, { "epoch": 0.22698836805695696, "grad_norm": 0.7639161944389343, "learning_rate": 1.547038440102421e-05, "loss": 2.8810712814331056, "step": 28120 }, { "epoch": 0.22706908938272402, "grad_norm": 0.8537403345108032, "learning_rate": 1.5468768931285996e-05, "loss": 2.83756046295166, "step": 28130 }, { "epoch": 0.22714981070849108, "grad_norm": 0.835923969745636, "learning_rate": 1.5467153461547783e-05, "loss": 3.5298553466796876, "step": 28140 }, { "epoch": 0.22723053203425814, "grad_norm": 1.4240297079086304, "learning_rate": 1.546553799180957e-05, "loss": 3.064924430847168, "step": 28150 }, { "epoch": 0.2273112533600252, "grad_norm": 0.8973989486694336, "learning_rate": 1.546392252207136e-05, "loss": 3.449513626098633, "step": 28160 }, { "epoch": 0.22739197468579225, "grad_norm": 2.059420108795166, "learning_rate": 1.5462307052333143e-05, "loss": 3.2612571716308594, "step": 28170 }, { "epoch": 0.22747269601155928, "grad_norm": 0.9745517373085022, "learning_rate": 1.546069158259493e-05, "loss": 2.908977508544922, "step": 28180 }, { "epoch": 0.22755341733732634, "grad_norm": 1.6985821723937988, "learning_rate": 1.545907611285672e-05, "loss": 2.931141662597656, "step": 28190 }, { "epoch": 0.2276341386630934, "grad_norm": 1.13430655002594, "learning_rate": 1.5457460643118506e-05, "loss": 3.2081966400146484, "step": 28200 }, { "epoch": 0.22771485998886046, "grad_norm": 1.0677614212036133, "learning_rate": 1.545584517338029e-05, "loss": 3.215376281738281, "step": 28210 }, { "epoch": 0.22779558131462752, "grad_norm": 0.9890756607055664, "learning_rate": 1.5454229703642078e-05, "loss": 2.696118927001953, "step": 28220 }, { "epoch": 0.22787630264039457, "grad_norm": 0.6502964496612549, "learning_rate": 1.5452614233903866e-05, "loss": 2.9321353912353514, "step": 28230 }, { "epoch": 0.22795702396616163, "grad_norm": 1.3728779554367065, "learning_rate": 1.5450998764165654e-05, "loss": 2.925637054443359, "step": 28240 }, { "epoch": 0.22803774529192866, "grad_norm": 0.901160717010498, "learning_rate": 1.5449383294427438e-05, "loss": 2.775476264953613, "step": 28250 }, { "epoch": 0.22811846661769572, "grad_norm": 3.135728597640991, "learning_rate": 1.5447767824689226e-05, "loss": 3.3693874359130858, "step": 28260 }, { "epoch": 0.22819918794346278, "grad_norm": 1.0534814596176147, "learning_rate": 1.5446152354951013e-05, "loss": 3.3003101348876953, "step": 28270 }, { "epoch": 0.22827990926922984, "grad_norm": 1.5614588260650635, "learning_rate": 1.54445368852128e-05, "loss": 3.760497283935547, "step": 28280 }, { "epoch": 0.2283606305949969, "grad_norm": 2.590092897415161, "learning_rate": 1.5442921415474585e-05, "loss": 3.5364498138427733, "step": 28290 }, { "epoch": 0.22844135192076395, "grad_norm": 1.0379834175109863, "learning_rate": 1.5441305945736373e-05, "loss": 3.690043640136719, "step": 28300 }, { "epoch": 0.228522073246531, "grad_norm": 0.975375771522522, "learning_rate": 1.543969047599816e-05, "loss": 3.0296909332275392, "step": 28310 }, { "epoch": 0.22860279457229807, "grad_norm": 0.8480278849601746, "learning_rate": 1.5438075006259948e-05, "loss": 3.049112892150879, "step": 28320 }, { "epoch": 0.2286835158980651, "grad_norm": 0.6906249523162842, "learning_rate": 1.5436459536521733e-05, "loss": 3.1013607025146483, "step": 28330 }, { "epoch": 0.22876423722383216, "grad_norm": 0.8217745423316956, "learning_rate": 1.543484406678352e-05, "loss": 3.5969898223876955, "step": 28340 }, { "epoch": 0.22884495854959921, "grad_norm": 0.8726729154586792, "learning_rate": 1.5433228597045308e-05, "loss": 3.3440872192382813, "step": 28350 }, { "epoch": 0.22892567987536627, "grad_norm": 0.9969680309295654, "learning_rate": 1.5431613127307096e-05, "loss": 2.8805187225341795, "step": 28360 }, { "epoch": 0.22900640120113333, "grad_norm": 0.8073418736457825, "learning_rate": 1.542999765756888e-05, "loss": 2.7989660263061524, "step": 28370 }, { "epoch": 0.2290871225269004, "grad_norm": 1.8466132879257202, "learning_rate": 1.5428382187830668e-05, "loss": 3.3504409790039062, "step": 28380 }, { "epoch": 0.22916784385266745, "grad_norm": 0.8366830945014954, "learning_rate": 1.5426766718092455e-05, "loss": 2.9731510162353514, "step": 28390 }, { "epoch": 0.22924856517843448, "grad_norm": 1.0223546028137207, "learning_rate": 1.5425151248354243e-05, "loss": 3.4629932403564454, "step": 28400 }, { "epoch": 0.22932928650420153, "grad_norm": 0.9355332255363464, "learning_rate": 1.5423535778616027e-05, "loss": 3.185639190673828, "step": 28410 }, { "epoch": 0.2294100078299686, "grad_norm": 0.5939189791679382, "learning_rate": 1.5421920308877815e-05, "loss": 3.1317670822143553, "step": 28420 }, { "epoch": 0.22949072915573565, "grad_norm": 1.2942180633544922, "learning_rate": 1.5420304839139603e-05, "loss": 3.179498291015625, "step": 28430 }, { "epoch": 0.2295714504815027, "grad_norm": 0.9143487811088562, "learning_rate": 1.541868936940139e-05, "loss": 3.346014404296875, "step": 28440 }, { "epoch": 0.22965217180726977, "grad_norm": 0.9171983003616333, "learning_rate": 1.5417073899663175e-05, "loss": 3.014133262634277, "step": 28450 }, { "epoch": 0.22973289313303683, "grad_norm": 1.0001251697540283, "learning_rate": 1.5415458429924962e-05, "loss": 3.2438404083251955, "step": 28460 }, { "epoch": 0.22981361445880388, "grad_norm": 0.7964974641799927, "learning_rate": 1.541384296018675e-05, "loss": 3.239054870605469, "step": 28470 }, { "epoch": 0.2298943357845709, "grad_norm": 1.0932023525238037, "learning_rate": 1.5412227490448538e-05, "loss": 2.9609466552734376, "step": 28480 }, { "epoch": 0.22997505711033797, "grad_norm": 1.1077659130096436, "learning_rate": 1.5410612020710322e-05, "loss": 3.0370223999023436, "step": 28490 }, { "epoch": 0.23005577843610503, "grad_norm": 0.969197690486908, "learning_rate": 1.540899655097211e-05, "loss": 3.194670867919922, "step": 28500 }, { "epoch": 0.2301364997618721, "grad_norm": 1.2394617795944214, "learning_rate": 1.5407381081233898e-05, "loss": 2.90877742767334, "step": 28510 }, { "epoch": 0.23021722108763915, "grad_norm": 1.0408549308776855, "learning_rate": 1.5405765611495685e-05, "loss": 2.7712385177612306, "step": 28520 }, { "epoch": 0.2302979424134062, "grad_norm": 1.4718605279922485, "learning_rate": 1.540415014175747e-05, "loss": 3.182717704772949, "step": 28530 }, { "epoch": 0.23037866373917326, "grad_norm": 0.6213049292564392, "learning_rate": 1.5402534672019257e-05, "loss": 3.1670732498168945, "step": 28540 }, { "epoch": 0.23045938506494032, "grad_norm": 0.8586536645889282, "learning_rate": 1.5400919202281045e-05, "loss": 3.206307220458984, "step": 28550 }, { "epoch": 0.23054010639070735, "grad_norm": 1.1716997623443604, "learning_rate": 1.5399303732542833e-05, "loss": 3.509205627441406, "step": 28560 }, { "epoch": 0.2306208277164744, "grad_norm": 1.1592185497283936, "learning_rate": 1.5397688262804617e-05, "loss": 2.8653940200805663, "step": 28570 }, { "epoch": 0.23070154904224147, "grad_norm": 1.3820492029190063, "learning_rate": 1.5396072793066405e-05, "loss": 3.055710220336914, "step": 28580 }, { "epoch": 0.23078227036800852, "grad_norm": 0.9426655173301697, "learning_rate": 1.5394457323328192e-05, "loss": 3.079188346862793, "step": 28590 }, { "epoch": 0.23086299169377558, "grad_norm": 0.8133694529533386, "learning_rate": 1.539284185358998e-05, "loss": 3.143584442138672, "step": 28600 }, { "epoch": 0.23094371301954264, "grad_norm": 0.7943417429924011, "learning_rate": 1.5391226383851764e-05, "loss": 3.092014694213867, "step": 28610 }, { "epoch": 0.2310244343453097, "grad_norm": 1.36107337474823, "learning_rate": 1.5389610914113552e-05, "loss": 3.619436264038086, "step": 28620 }, { "epoch": 0.23110515567107673, "grad_norm": 0.8709272742271423, "learning_rate": 1.538799544437534e-05, "loss": 3.3950077056884767, "step": 28630 }, { "epoch": 0.2311858769968438, "grad_norm": 0.894069492816925, "learning_rate": 1.5386379974637127e-05, "loss": 2.966573143005371, "step": 28640 }, { "epoch": 0.23126659832261084, "grad_norm": 0.7774169445037842, "learning_rate": 1.538476450489891e-05, "loss": 3.1850120544433596, "step": 28650 }, { "epoch": 0.2313473196483779, "grad_norm": 1.102315068244934, "learning_rate": 1.53831490351607e-05, "loss": 2.993979072570801, "step": 28660 }, { "epoch": 0.23142804097414496, "grad_norm": 0.9774245619773865, "learning_rate": 1.5381533565422487e-05, "loss": 2.9078550338745117, "step": 28670 }, { "epoch": 0.23150876229991202, "grad_norm": 0.7966218590736389, "learning_rate": 1.5379918095684275e-05, "loss": 3.28124885559082, "step": 28680 }, { "epoch": 0.23158948362567908, "grad_norm": 0.9239615797996521, "learning_rate": 1.537830262594606e-05, "loss": 3.327130889892578, "step": 28690 }, { "epoch": 0.23167020495144613, "grad_norm": 1.3093619346618652, "learning_rate": 1.5376687156207847e-05, "loss": 3.3330005645751952, "step": 28700 }, { "epoch": 0.23175092627721317, "grad_norm": 1.2868740558624268, "learning_rate": 1.5375071686469634e-05, "loss": 2.910399627685547, "step": 28710 }, { "epoch": 0.23183164760298022, "grad_norm": 1.061335802078247, "learning_rate": 1.5373456216731422e-05, "loss": 3.033242034912109, "step": 28720 }, { "epoch": 0.23191236892874728, "grad_norm": 1.0560780763626099, "learning_rate": 1.5371840746993206e-05, "loss": 2.8783184051513673, "step": 28730 }, { "epoch": 0.23199309025451434, "grad_norm": 1.0793943405151367, "learning_rate": 1.5370225277254994e-05, "loss": 3.250235366821289, "step": 28740 }, { "epoch": 0.2320738115802814, "grad_norm": 1.7877260446548462, "learning_rate": 1.5368609807516782e-05, "loss": 2.924867057800293, "step": 28750 }, { "epoch": 0.23215453290604846, "grad_norm": 0.8734738230705261, "learning_rate": 1.536699433777857e-05, "loss": 3.0213977813720705, "step": 28760 }, { "epoch": 0.2322352542318155, "grad_norm": 0.8880020976066589, "learning_rate": 1.5365378868040354e-05, "loss": 3.188955307006836, "step": 28770 }, { "epoch": 0.23231597555758257, "grad_norm": 1.1385705471038818, "learning_rate": 1.536376339830214e-05, "loss": 2.9208868026733397, "step": 28780 }, { "epoch": 0.2323966968833496, "grad_norm": 1.1437793970108032, "learning_rate": 1.536214792856393e-05, "loss": 3.301270294189453, "step": 28790 }, { "epoch": 0.23247741820911666, "grad_norm": 0.874075710773468, "learning_rate": 1.5360532458825717e-05, "loss": 3.456418991088867, "step": 28800 }, { "epoch": 0.23255813953488372, "grad_norm": 0.8362375497817993, "learning_rate": 1.53589169890875e-05, "loss": 2.646884727478027, "step": 28810 }, { "epoch": 0.23263886086065078, "grad_norm": 1.3630620241165161, "learning_rate": 1.535730151934929e-05, "loss": 3.1980815887451173, "step": 28820 }, { "epoch": 0.23271958218641783, "grad_norm": 1.2134262323379517, "learning_rate": 1.5355686049611077e-05, "loss": 3.8686588287353514, "step": 28830 }, { "epoch": 0.2328003035121849, "grad_norm": 0.9563365578651428, "learning_rate": 1.5354070579872864e-05, "loss": 3.6951412200927733, "step": 28840 }, { "epoch": 0.23288102483795195, "grad_norm": 1.0355626344680786, "learning_rate": 1.535245511013465e-05, "loss": 3.2825904846191407, "step": 28850 }, { "epoch": 0.23296174616371898, "grad_norm": 0.8412178158760071, "learning_rate": 1.5350839640396436e-05, "loss": 3.1319744110107424, "step": 28860 }, { "epoch": 0.23304246748948604, "grad_norm": 1.9890456199645996, "learning_rate": 1.5349224170658224e-05, "loss": 3.115341377258301, "step": 28870 }, { "epoch": 0.2331231888152531, "grad_norm": 1.2727752923965454, "learning_rate": 1.534760870092001e-05, "loss": 3.394107437133789, "step": 28880 }, { "epoch": 0.23320391014102015, "grad_norm": 0.9413354992866516, "learning_rate": 1.5345993231181796e-05, "loss": 3.258121109008789, "step": 28890 }, { "epoch": 0.2332846314667872, "grad_norm": 0.7300459146499634, "learning_rate": 1.5344377761443584e-05, "loss": 2.9864973068237304, "step": 28900 }, { "epoch": 0.23336535279255427, "grad_norm": 1.3183187246322632, "learning_rate": 1.534276229170537e-05, "loss": 3.292746353149414, "step": 28910 }, { "epoch": 0.23344607411832133, "grad_norm": 1.1897926330566406, "learning_rate": 1.534114682196716e-05, "loss": 2.808028793334961, "step": 28920 }, { "epoch": 0.2335267954440884, "grad_norm": 0.9070863127708435, "learning_rate": 1.5339531352228943e-05, "loss": 3.354380416870117, "step": 28930 }, { "epoch": 0.23360751676985542, "grad_norm": 1.3835021257400513, "learning_rate": 1.533791588249073e-05, "loss": 3.1484085083007813, "step": 28940 }, { "epoch": 0.23368823809562247, "grad_norm": 1.241132140159607, "learning_rate": 1.533630041275252e-05, "loss": 3.3740081787109375, "step": 28950 }, { "epoch": 0.23376895942138953, "grad_norm": 0.8404585123062134, "learning_rate": 1.5334684943014306e-05, "loss": 2.843898963928223, "step": 28960 }, { "epoch": 0.2338496807471566, "grad_norm": 0.8320162892341614, "learning_rate": 1.533306947327609e-05, "loss": 3.3730514526367186, "step": 28970 }, { "epoch": 0.23393040207292365, "grad_norm": 0.9767749309539795, "learning_rate": 1.533145400353788e-05, "loss": 3.055088424682617, "step": 28980 }, { "epoch": 0.2340111233986907, "grad_norm": 2.084888219833374, "learning_rate": 1.5329838533799666e-05, "loss": 3.2560752868652343, "step": 28990 }, { "epoch": 0.23409184472445776, "grad_norm": 0.8400686979293823, "learning_rate": 1.5328223064061454e-05, "loss": 3.2131629943847657, "step": 29000 }, { "epoch": 0.2341725660502248, "grad_norm": 1.5032999515533447, "learning_rate": 1.5326607594323238e-05, "loss": 3.324177551269531, "step": 29010 }, { "epoch": 0.23425328737599185, "grad_norm": 1.1274560689926147, "learning_rate": 1.532499212458503e-05, "loss": 2.866817092895508, "step": 29020 }, { "epoch": 0.2343340087017589, "grad_norm": 0.9085372090339661, "learning_rate": 1.5323376654846813e-05, "loss": 2.6402608871459963, "step": 29030 }, { "epoch": 0.23441473002752597, "grad_norm": 0.8237525820732117, "learning_rate": 1.53217611851086e-05, "loss": 3.479134750366211, "step": 29040 }, { "epoch": 0.23449545135329303, "grad_norm": 0.8485758900642395, "learning_rate": 1.5320145715370385e-05, "loss": 2.9235918045043947, "step": 29050 }, { "epoch": 0.23457617267906009, "grad_norm": 1.2611716985702515, "learning_rate": 1.5318530245632177e-05, "loss": 2.9838638305664062, "step": 29060 }, { "epoch": 0.23465689400482714, "grad_norm": 0.7952306270599365, "learning_rate": 1.5316914775893964e-05, "loss": 3.103048324584961, "step": 29070 }, { "epoch": 0.2347376153305942, "grad_norm": 0.9107483625411987, "learning_rate": 1.531529930615575e-05, "loss": 3.6182327270507812, "step": 29080 }, { "epoch": 0.23481833665636123, "grad_norm": 0.9393841028213501, "learning_rate": 1.5313683836417536e-05, "loss": 3.6678367614746095, "step": 29090 }, { "epoch": 0.2348990579821283, "grad_norm": 0.7444682717323303, "learning_rate": 1.5312068366679324e-05, "loss": 3.2407657623291017, "step": 29100 }, { "epoch": 0.23497977930789535, "grad_norm": 1.0314908027648926, "learning_rate": 1.531045289694111e-05, "loss": 2.887262725830078, "step": 29110 }, { "epoch": 0.2350605006336624, "grad_norm": 0.9273725748062134, "learning_rate": 1.5308837427202896e-05, "loss": 3.0737218856811523, "step": 29120 }, { "epoch": 0.23514122195942946, "grad_norm": 1.2456556558609009, "learning_rate": 1.5307221957464684e-05, "loss": 3.1168628692626954, "step": 29130 }, { "epoch": 0.23522194328519652, "grad_norm": 0.9328882694244385, "learning_rate": 1.530560648772647e-05, "loss": 3.0746049880981445, "step": 29140 }, { "epoch": 0.23530266461096358, "grad_norm": 0.7662389278411865, "learning_rate": 1.530399101798826e-05, "loss": 3.2670082092285155, "step": 29150 }, { "epoch": 0.23538338593673064, "grad_norm": 0.6350990533828735, "learning_rate": 1.5302375548250043e-05, "loss": 2.9138965606689453, "step": 29160 }, { "epoch": 0.23546410726249767, "grad_norm": 0.7479681372642517, "learning_rate": 1.530076007851183e-05, "loss": 2.680933380126953, "step": 29170 }, { "epoch": 0.23554482858826473, "grad_norm": 0.5724937319755554, "learning_rate": 1.529914460877362e-05, "loss": 3.4460758209228515, "step": 29180 }, { "epoch": 0.23562554991403178, "grad_norm": 1.2387100458145142, "learning_rate": 1.5297529139035406e-05, "loss": 3.0721004486083983, "step": 29190 }, { "epoch": 0.23570627123979884, "grad_norm": 0.5468271374702454, "learning_rate": 1.529591366929719e-05, "loss": 3.0697099685668947, "step": 29200 }, { "epoch": 0.2357869925655659, "grad_norm": 1.2205983400344849, "learning_rate": 1.529429819955898e-05, "loss": 3.206473159790039, "step": 29210 }, { "epoch": 0.23586771389133296, "grad_norm": 0.8289974331855774, "learning_rate": 1.5292682729820766e-05, "loss": 3.2614604949951174, "step": 29220 }, { "epoch": 0.23594843521710002, "grad_norm": 1.202103614807129, "learning_rate": 1.5291067260082554e-05, "loss": 3.316741180419922, "step": 29230 }, { "epoch": 0.23602915654286705, "grad_norm": 0.6249068975448608, "learning_rate": 1.5289451790344338e-05, "loss": 2.987063980102539, "step": 29240 }, { "epoch": 0.2361098778686341, "grad_norm": 0.9149074554443359, "learning_rate": 1.5287836320606126e-05, "loss": 3.0239871978759765, "step": 29250 }, { "epoch": 0.23619059919440116, "grad_norm": 0.9310307502746582, "learning_rate": 1.5286220850867913e-05, "loss": 2.8248079299926756, "step": 29260 }, { "epoch": 0.23627132052016822, "grad_norm": 0.9489787817001343, "learning_rate": 1.52846053811297e-05, "loss": 3.3600383758544923, "step": 29270 }, { "epoch": 0.23635204184593528, "grad_norm": 0.7441697120666504, "learning_rate": 1.5282989911391485e-05, "loss": 3.157908821105957, "step": 29280 }, { "epoch": 0.23643276317170234, "grad_norm": 0.9616948962211609, "learning_rate": 1.5281374441653273e-05, "loss": 3.3326168060302734, "step": 29290 }, { "epoch": 0.2365134844974694, "grad_norm": 0.6788386106491089, "learning_rate": 1.527975897191506e-05, "loss": 3.2649295806884764, "step": 29300 }, { "epoch": 0.23659420582323645, "grad_norm": 1.4695732593536377, "learning_rate": 1.527814350217685e-05, "loss": 3.261953353881836, "step": 29310 }, { "epoch": 0.23667492714900348, "grad_norm": 1.0381486415863037, "learning_rate": 1.5276528032438633e-05, "loss": 3.273516082763672, "step": 29320 }, { "epoch": 0.23675564847477054, "grad_norm": 0.8374394178390503, "learning_rate": 1.527491256270042e-05, "loss": 3.1456111907958983, "step": 29330 }, { "epoch": 0.2368363698005376, "grad_norm": 0.6265267133712769, "learning_rate": 1.5273297092962208e-05, "loss": 2.9546924591064454, "step": 29340 }, { "epoch": 0.23691709112630466, "grad_norm": 0.7842593789100647, "learning_rate": 1.5271681623223996e-05, "loss": 3.3148197174072265, "step": 29350 }, { "epoch": 0.23699781245207172, "grad_norm": 1.3362685441970825, "learning_rate": 1.527006615348578e-05, "loss": 3.272938919067383, "step": 29360 }, { "epoch": 0.23707853377783877, "grad_norm": 1.5947344303131104, "learning_rate": 1.5268450683747568e-05, "loss": 3.3174983978271486, "step": 29370 }, { "epoch": 0.23715925510360583, "grad_norm": 0.8772404193878174, "learning_rate": 1.5266835214009356e-05, "loss": 3.027594566345215, "step": 29380 }, { "epoch": 0.2372399764293729, "grad_norm": 1.145594835281372, "learning_rate": 1.5265219744271143e-05, "loss": 2.986227798461914, "step": 29390 }, { "epoch": 0.23732069775513992, "grad_norm": 1.5322431325912476, "learning_rate": 1.5263604274532928e-05, "loss": 3.0930110931396486, "step": 29400 }, { "epoch": 0.23740141908090698, "grad_norm": 1.411321997642517, "learning_rate": 1.5261988804794715e-05, "loss": 3.448046112060547, "step": 29410 }, { "epoch": 0.23748214040667404, "grad_norm": 1.1108458042144775, "learning_rate": 1.5260373335056503e-05, "loss": 3.239076614379883, "step": 29420 }, { "epoch": 0.2375628617324411, "grad_norm": 0.9651551842689514, "learning_rate": 1.525875786531829e-05, "loss": 3.205508804321289, "step": 29430 }, { "epoch": 0.23764358305820815, "grad_norm": 1.1283316612243652, "learning_rate": 1.5257142395580075e-05, "loss": 3.166094207763672, "step": 29440 }, { "epoch": 0.2377243043839752, "grad_norm": 0.9715378284454346, "learning_rate": 1.5255526925841864e-05, "loss": 3.181594657897949, "step": 29450 }, { "epoch": 0.23780502570974227, "grad_norm": 1.0540928840637207, "learning_rate": 1.5253911456103649e-05, "loss": 3.1748025894165037, "step": 29460 }, { "epoch": 0.2378857470355093, "grad_norm": 0.628686249256134, "learning_rate": 1.5252295986365438e-05, "loss": 2.997379684448242, "step": 29470 }, { "epoch": 0.23796646836127636, "grad_norm": 0.9996498227119446, "learning_rate": 1.5250680516627222e-05, "loss": 3.0082429885864257, "step": 29480 }, { "epoch": 0.23804718968704341, "grad_norm": 0.7464725375175476, "learning_rate": 1.5249065046889012e-05, "loss": 2.9019180297851563, "step": 29490 }, { "epoch": 0.23812791101281047, "grad_norm": 1.1006008386611938, "learning_rate": 1.5247449577150796e-05, "loss": 2.5611202239990236, "step": 29500 }, { "epoch": 0.23820863233857753, "grad_norm": 1.1982728242874146, "learning_rate": 1.5245834107412585e-05, "loss": 3.0547719955444337, "step": 29510 }, { "epoch": 0.2382893536643446, "grad_norm": 1.192762851715088, "learning_rate": 1.524421863767437e-05, "loss": 3.2881023406982424, "step": 29520 }, { "epoch": 0.23837007499011165, "grad_norm": 0.7501388192176819, "learning_rate": 1.524260316793616e-05, "loss": 2.942823600769043, "step": 29530 }, { "epoch": 0.2384507963158787, "grad_norm": 0.9024586081504822, "learning_rate": 1.5240987698197945e-05, "loss": 2.9108400344848633, "step": 29540 }, { "epoch": 0.23853151764164574, "grad_norm": 1.0283892154693604, "learning_rate": 1.5239372228459733e-05, "loss": 2.9696172714233398, "step": 29550 }, { "epoch": 0.2386122389674128, "grad_norm": 1.330032229423523, "learning_rate": 1.5237756758721519e-05, "loss": 2.964645576477051, "step": 29560 }, { "epoch": 0.23869296029317985, "grad_norm": 0.7359756827354431, "learning_rate": 1.5236141288983307e-05, "loss": 2.828742027282715, "step": 29570 }, { "epoch": 0.2387736816189469, "grad_norm": 1.042504072189331, "learning_rate": 1.5234525819245093e-05, "loss": 3.003463363647461, "step": 29580 }, { "epoch": 0.23885440294471397, "grad_norm": 1.020729899406433, "learning_rate": 1.523291034950688e-05, "loss": 3.1432783126831056, "step": 29590 }, { "epoch": 0.23893512427048103, "grad_norm": 0.9642510414123535, "learning_rate": 1.5231294879768666e-05, "loss": 2.986206817626953, "step": 29600 }, { "epoch": 0.23901584559624808, "grad_norm": 0.8963557481765747, "learning_rate": 1.5229679410030454e-05, "loss": 3.1026330947875977, "step": 29610 }, { "epoch": 0.2390965669220151, "grad_norm": 1.0141922235488892, "learning_rate": 1.522806394029224e-05, "loss": 3.073238182067871, "step": 29620 }, { "epoch": 0.23917728824778217, "grad_norm": 1.3297418355941772, "learning_rate": 1.5226448470554028e-05, "loss": 3.0709577560424806, "step": 29630 }, { "epoch": 0.23925800957354923, "grad_norm": 0.9681147933006287, "learning_rate": 1.5224833000815814e-05, "loss": 3.205808639526367, "step": 29640 }, { "epoch": 0.2393387308993163, "grad_norm": 1.386301875114441, "learning_rate": 1.5223217531077601e-05, "loss": 3.0358243942260743, "step": 29650 }, { "epoch": 0.23941945222508335, "grad_norm": 1.515381097793579, "learning_rate": 1.5221602061339387e-05, "loss": 2.7562789916992188, "step": 29660 }, { "epoch": 0.2395001735508504, "grad_norm": 1.2773103713989258, "learning_rate": 1.5219986591601175e-05, "loss": 3.3263362884521483, "step": 29670 }, { "epoch": 0.23958089487661746, "grad_norm": 1.2292670011520386, "learning_rate": 1.5218371121862961e-05, "loss": 3.2852252960205077, "step": 29680 }, { "epoch": 0.23966161620238452, "grad_norm": 0.613700270652771, "learning_rate": 1.5216755652124749e-05, "loss": 2.984629249572754, "step": 29690 }, { "epoch": 0.23974233752815155, "grad_norm": 0.8975734114646912, "learning_rate": 1.5215140182386535e-05, "loss": 2.918147659301758, "step": 29700 }, { "epoch": 0.2398230588539186, "grad_norm": 0.8207910060882568, "learning_rate": 1.5213524712648322e-05, "loss": 3.0548763275146484, "step": 29710 }, { "epoch": 0.23990378017968567, "grad_norm": 0.763342559337616, "learning_rate": 1.5211909242910108e-05, "loss": 3.0930580139160155, "step": 29720 }, { "epoch": 0.23998450150545272, "grad_norm": 0.6865494251251221, "learning_rate": 1.5210293773171896e-05, "loss": 2.8906581878662108, "step": 29730 }, { "epoch": 0.24006522283121978, "grad_norm": 0.7801749110221863, "learning_rate": 1.5208678303433682e-05, "loss": 3.2249591827392576, "step": 29740 }, { "epoch": 0.24014594415698684, "grad_norm": 1.2513306140899658, "learning_rate": 1.520706283369547e-05, "loss": 2.811752510070801, "step": 29750 }, { "epoch": 0.2402266654827539, "grad_norm": 0.8643468022346497, "learning_rate": 1.5205447363957256e-05, "loss": 2.7416254043579102, "step": 29760 }, { "epoch": 0.24030738680852096, "grad_norm": 0.6682621240615845, "learning_rate": 1.5203831894219043e-05, "loss": 3.3563289642333984, "step": 29770 }, { "epoch": 0.240388108134288, "grad_norm": 0.6654866933822632, "learning_rate": 1.520221642448083e-05, "loss": 2.5784582138061523, "step": 29780 }, { "epoch": 0.24046882946005504, "grad_norm": 0.8629416227340698, "learning_rate": 1.5200600954742617e-05, "loss": 3.398696517944336, "step": 29790 }, { "epoch": 0.2405495507858221, "grad_norm": 1.0770431756973267, "learning_rate": 1.5198985485004403e-05, "loss": 3.230488586425781, "step": 29800 }, { "epoch": 0.24063027211158916, "grad_norm": 0.8167995810508728, "learning_rate": 1.519737001526619e-05, "loss": 3.1520957946777344, "step": 29810 }, { "epoch": 0.24071099343735622, "grad_norm": 0.8187308311462402, "learning_rate": 1.5195754545527977e-05, "loss": 2.981144905090332, "step": 29820 }, { "epoch": 0.24079171476312328, "grad_norm": 1.0403751134872437, "learning_rate": 1.5194139075789765e-05, "loss": 3.304978942871094, "step": 29830 }, { "epoch": 0.24087243608889033, "grad_norm": 0.7945621609687805, "learning_rate": 1.519252360605155e-05, "loss": 3.513135528564453, "step": 29840 }, { "epoch": 0.24095315741465737, "grad_norm": 1.0884191989898682, "learning_rate": 1.5190908136313338e-05, "loss": 3.0186203002929686, "step": 29850 }, { "epoch": 0.24103387874042442, "grad_norm": 1.0081965923309326, "learning_rate": 1.5189292666575124e-05, "loss": 2.962862014770508, "step": 29860 }, { "epoch": 0.24111460006619148, "grad_norm": 1.105406641960144, "learning_rate": 1.5187677196836912e-05, "loss": 3.0263423919677734, "step": 29870 }, { "epoch": 0.24119532139195854, "grad_norm": 0.8107389807701111, "learning_rate": 1.5186061727098698e-05, "loss": 3.380168151855469, "step": 29880 }, { "epoch": 0.2412760427177256, "grad_norm": 0.6938613653182983, "learning_rate": 1.5184446257360486e-05, "loss": 3.2632369995117188, "step": 29890 }, { "epoch": 0.24135676404349266, "grad_norm": 1.2774410247802734, "learning_rate": 1.5182830787622272e-05, "loss": 3.4461620330810545, "step": 29900 }, { "epoch": 0.2414374853692597, "grad_norm": 1.037611961364746, "learning_rate": 1.518121531788406e-05, "loss": 2.9401079177856446, "step": 29910 }, { "epoch": 0.24151820669502677, "grad_norm": 1.073081374168396, "learning_rate": 1.5179599848145845e-05, "loss": 2.885910987854004, "step": 29920 }, { "epoch": 0.2415989280207938, "grad_norm": 1.1241178512573242, "learning_rate": 1.5177984378407633e-05, "loss": 2.7726556777954103, "step": 29930 }, { "epoch": 0.24167964934656086, "grad_norm": 0.5378992557525635, "learning_rate": 1.5176368908669419e-05, "loss": 3.0664274215698244, "step": 29940 }, { "epoch": 0.24176037067232792, "grad_norm": 1.1005624532699585, "learning_rate": 1.5174753438931207e-05, "loss": 3.1308465957641602, "step": 29950 }, { "epoch": 0.24184109199809498, "grad_norm": 0.8054332733154297, "learning_rate": 1.5173137969192993e-05, "loss": 3.202010726928711, "step": 29960 }, { "epoch": 0.24192181332386203, "grad_norm": 1.2102181911468506, "learning_rate": 1.517152249945478e-05, "loss": 2.9085247039794924, "step": 29970 }, { "epoch": 0.2420025346496291, "grad_norm": 1.0517889261245728, "learning_rate": 1.5169907029716566e-05, "loss": 2.7735870361328123, "step": 29980 }, { "epoch": 0.24208325597539615, "grad_norm": 0.841133177280426, "learning_rate": 1.5168291559978354e-05, "loss": 2.9249237060546873, "step": 29990 }, { "epoch": 0.2421639773011632, "grad_norm": 1.4457656145095825, "learning_rate": 1.516667609024014e-05, "loss": 3.770944595336914, "step": 30000 }, { "epoch": 0.24224469862693024, "grad_norm": 0.8121318817138672, "learning_rate": 1.5165060620501928e-05, "loss": 3.628006362915039, "step": 30010 }, { "epoch": 0.2423254199526973, "grad_norm": 0.6170514822006226, "learning_rate": 1.5163445150763714e-05, "loss": 2.7204524993896486, "step": 30020 }, { "epoch": 0.24240614127846435, "grad_norm": 0.779514729976654, "learning_rate": 1.5161829681025501e-05, "loss": 3.7898700714111326, "step": 30030 }, { "epoch": 0.2424868626042314, "grad_norm": 0.7431046366691589, "learning_rate": 1.5160214211287287e-05, "loss": 3.005930709838867, "step": 30040 }, { "epoch": 0.24256758392999847, "grad_norm": 1.1234922409057617, "learning_rate": 1.5158598741549075e-05, "loss": 2.9512327194213865, "step": 30050 }, { "epoch": 0.24264830525576553, "grad_norm": 0.8929536938667297, "learning_rate": 1.5156983271810861e-05, "loss": 3.165576362609863, "step": 30060 }, { "epoch": 0.2427290265815326, "grad_norm": 1.1975724697113037, "learning_rate": 1.5155367802072649e-05, "loss": 2.9246341705322267, "step": 30070 }, { "epoch": 0.24280974790729962, "grad_norm": 0.9424760937690735, "learning_rate": 1.5153752332334435e-05, "loss": 3.1953901290893554, "step": 30080 }, { "epoch": 0.24289046923306667, "grad_norm": 0.9062631726264954, "learning_rate": 1.5152136862596223e-05, "loss": 3.240372085571289, "step": 30090 }, { "epoch": 0.24297119055883373, "grad_norm": 0.9256771206855774, "learning_rate": 1.5150521392858009e-05, "loss": 2.8908199310302733, "step": 30100 }, { "epoch": 0.2430519118846008, "grad_norm": 0.6962367296218872, "learning_rate": 1.5148905923119796e-05, "loss": 2.8811120986938477, "step": 30110 }, { "epoch": 0.24313263321036785, "grad_norm": 1.1330046653747559, "learning_rate": 1.5147290453381582e-05, "loss": 3.2599407196044923, "step": 30120 }, { "epoch": 0.2432133545361349, "grad_norm": 0.8948396444320679, "learning_rate": 1.514567498364337e-05, "loss": 3.3506263732910155, "step": 30130 }, { "epoch": 0.24329407586190196, "grad_norm": 0.8800362944602966, "learning_rate": 1.5144059513905156e-05, "loss": 3.071453857421875, "step": 30140 }, { "epoch": 0.24337479718766902, "grad_norm": 0.663625180721283, "learning_rate": 1.5142444044166944e-05, "loss": 2.791765022277832, "step": 30150 }, { "epoch": 0.24345551851343605, "grad_norm": 0.8010947108268738, "learning_rate": 1.514082857442873e-05, "loss": 3.013239860534668, "step": 30160 }, { "epoch": 0.2435362398392031, "grad_norm": 0.8314634561538696, "learning_rate": 1.5139213104690517e-05, "loss": 2.9569046020507814, "step": 30170 }, { "epoch": 0.24361696116497017, "grad_norm": 0.9608925580978394, "learning_rate": 1.5137597634952303e-05, "loss": 3.1451416015625, "step": 30180 }, { "epoch": 0.24369768249073723, "grad_norm": 2.013131856918335, "learning_rate": 1.5135982165214091e-05, "loss": 3.2749874114990236, "step": 30190 }, { "epoch": 0.24377840381650429, "grad_norm": 1.1814237833023071, "learning_rate": 1.5134366695475877e-05, "loss": 3.097176933288574, "step": 30200 }, { "epoch": 0.24385912514227134, "grad_norm": 1.5378214120864868, "learning_rate": 1.5132751225737665e-05, "loss": 3.3122962951660155, "step": 30210 }, { "epoch": 0.2439398464680384, "grad_norm": 1.0706777572631836, "learning_rate": 1.513113575599945e-05, "loss": 2.6286638259887694, "step": 30220 }, { "epoch": 0.24402056779380543, "grad_norm": 1.0430560111999512, "learning_rate": 1.5129520286261238e-05, "loss": 3.106596755981445, "step": 30230 }, { "epoch": 0.2441012891195725, "grad_norm": 0.6592347025871277, "learning_rate": 1.5127904816523024e-05, "loss": 3.1229509353637694, "step": 30240 }, { "epoch": 0.24418201044533955, "grad_norm": 1.174383521080017, "learning_rate": 1.5126289346784812e-05, "loss": 3.203504180908203, "step": 30250 }, { "epoch": 0.2442627317711066, "grad_norm": 1.0573298931121826, "learning_rate": 1.5124673877046598e-05, "loss": 2.8003835678100586, "step": 30260 }, { "epoch": 0.24434345309687366, "grad_norm": 1.014724850654602, "learning_rate": 1.5123058407308386e-05, "loss": 2.916141891479492, "step": 30270 }, { "epoch": 0.24442417442264072, "grad_norm": 1.6430540084838867, "learning_rate": 1.5121442937570172e-05, "loss": 3.230947494506836, "step": 30280 }, { "epoch": 0.24450489574840778, "grad_norm": 0.7426477074623108, "learning_rate": 1.511982746783196e-05, "loss": 2.933418083190918, "step": 30290 }, { "epoch": 0.24458561707417484, "grad_norm": 1.189207911491394, "learning_rate": 1.5118211998093745e-05, "loss": 3.3915725708007813, "step": 30300 }, { "epoch": 0.24466633839994187, "grad_norm": 1.277724266052246, "learning_rate": 1.5116596528355533e-05, "loss": 2.785004997253418, "step": 30310 }, { "epoch": 0.24474705972570893, "grad_norm": 0.9682142734527588, "learning_rate": 1.5114981058617323e-05, "loss": 2.976753997802734, "step": 30320 }, { "epoch": 0.24482778105147598, "grad_norm": 0.5890793800354004, "learning_rate": 1.5113365588879107e-05, "loss": 2.933992385864258, "step": 30330 }, { "epoch": 0.24490850237724304, "grad_norm": 1.235939383506775, "learning_rate": 1.5111750119140896e-05, "loss": 3.1366024017333984, "step": 30340 }, { "epoch": 0.2449892237030101, "grad_norm": 1.431095004081726, "learning_rate": 1.511013464940268e-05, "loss": 3.4987808227539063, "step": 30350 }, { "epoch": 0.24506994502877716, "grad_norm": 0.8091149926185608, "learning_rate": 1.510851917966447e-05, "loss": 2.678995895385742, "step": 30360 }, { "epoch": 0.24515066635454422, "grad_norm": 0.9618607759475708, "learning_rate": 1.5106903709926254e-05, "loss": 3.0233659744262695, "step": 30370 }, { "epoch": 0.24523138768031127, "grad_norm": 0.7306783199310303, "learning_rate": 1.5105288240188044e-05, "loss": 2.7105133056640627, "step": 30380 }, { "epoch": 0.2453121090060783, "grad_norm": 1.0088869333267212, "learning_rate": 1.5103672770449828e-05, "loss": 3.0858999252319337, "step": 30390 }, { "epoch": 0.24539283033184536, "grad_norm": 1.0851553678512573, "learning_rate": 1.5102057300711617e-05, "loss": 3.0617685317993164, "step": 30400 }, { "epoch": 0.24547355165761242, "grad_norm": 0.9534112811088562, "learning_rate": 1.5100441830973403e-05, "loss": 3.172287940979004, "step": 30410 }, { "epoch": 0.24555427298337948, "grad_norm": 0.5760051608085632, "learning_rate": 1.5098826361235191e-05, "loss": 3.0944442749023438, "step": 30420 }, { "epoch": 0.24563499430914654, "grad_norm": 0.8855196237564087, "learning_rate": 1.5097210891496977e-05, "loss": 3.24786262512207, "step": 30430 }, { "epoch": 0.2457157156349136, "grad_norm": 1.8561303615570068, "learning_rate": 1.5095595421758765e-05, "loss": 3.187538909912109, "step": 30440 }, { "epoch": 0.24579643696068065, "grad_norm": 0.7621686458587646, "learning_rate": 1.509397995202055e-05, "loss": 3.5722816467285154, "step": 30450 }, { "epoch": 0.24587715828644768, "grad_norm": 0.8044044375419617, "learning_rate": 1.5092364482282338e-05, "loss": 2.876626396179199, "step": 30460 }, { "epoch": 0.24595787961221474, "grad_norm": 0.9622771739959717, "learning_rate": 1.5090749012544124e-05, "loss": 3.0484249114990236, "step": 30470 }, { "epoch": 0.2460386009379818, "grad_norm": 1.330501675605774, "learning_rate": 1.5089133542805912e-05, "loss": 3.1380128860473633, "step": 30480 }, { "epoch": 0.24611932226374886, "grad_norm": 0.9163832664489746, "learning_rate": 1.5087518073067698e-05, "loss": 2.7265607833862306, "step": 30490 }, { "epoch": 0.24620004358951592, "grad_norm": 1.7236144542694092, "learning_rate": 1.5085902603329486e-05, "loss": 3.308927536010742, "step": 30500 }, { "epoch": 0.24628076491528297, "grad_norm": 0.7680838704109192, "learning_rate": 1.5084287133591272e-05, "loss": 2.7736671447753904, "step": 30510 }, { "epoch": 0.24636148624105003, "grad_norm": 0.8776394724845886, "learning_rate": 1.508267166385306e-05, "loss": 3.0896087646484376, "step": 30520 }, { "epoch": 0.2464422075668171, "grad_norm": 0.9669503569602966, "learning_rate": 1.5081056194114845e-05, "loss": 3.108816146850586, "step": 30530 }, { "epoch": 0.24652292889258412, "grad_norm": 1.2171916961669922, "learning_rate": 1.5079440724376633e-05, "loss": 2.7155782699584963, "step": 30540 }, { "epoch": 0.24660365021835118, "grad_norm": 1.3306288719177246, "learning_rate": 1.5077825254638419e-05, "loss": 3.122727966308594, "step": 30550 }, { "epoch": 0.24668437154411824, "grad_norm": 0.7485578656196594, "learning_rate": 1.5076209784900207e-05, "loss": 3.0306089401245115, "step": 30560 }, { "epoch": 0.2467650928698853, "grad_norm": 0.8291028141975403, "learning_rate": 1.5074594315161993e-05, "loss": 3.0533960342407225, "step": 30570 }, { "epoch": 0.24684581419565235, "grad_norm": 0.6149272322654724, "learning_rate": 1.507297884542378e-05, "loss": 2.759465217590332, "step": 30580 }, { "epoch": 0.2469265355214194, "grad_norm": 0.9585369229316711, "learning_rate": 1.5071363375685566e-05, "loss": 2.9384265899658204, "step": 30590 }, { "epoch": 0.24700725684718647, "grad_norm": 1.090760350227356, "learning_rate": 1.5069747905947354e-05, "loss": 2.9639326095581056, "step": 30600 }, { "epoch": 0.24708797817295353, "grad_norm": 1.0996569395065308, "learning_rate": 1.506813243620914e-05, "loss": 3.1179113388061523, "step": 30610 }, { "epoch": 0.24716869949872056, "grad_norm": 1.2662409543991089, "learning_rate": 1.5066516966470928e-05, "loss": 3.2993114471435545, "step": 30620 }, { "epoch": 0.24724942082448761, "grad_norm": 0.7782710194587708, "learning_rate": 1.5064901496732714e-05, "loss": 2.9768583297729494, "step": 30630 }, { "epoch": 0.24733014215025467, "grad_norm": 1.0496232509613037, "learning_rate": 1.5063286026994502e-05, "loss": 2.567959785461426, "step": 30640 }, { "epoch": 0.24741086347602173, "grad_norm": 1.743208408355713, "learning_rate": 1.5061670557256288e-05, "loss": 3.018612861633301, "step": 30650 }, { "epoch": 0.2474915848017888, "grad_norm": 1.3586273193359375, "learning_rate": 1.5060055087518075e-05, "loss": 3.044967269897461, "step": 30660 }, { "epoch": 0.24757230612755585, "grad_norm": 1.5723073482513428, "learning_rate": 1.5058439617779861e-05, "loss": 2.7356342315673827, "step": 30670 }, { "epoch": 0.2476530274533229, "grad_norm": 0.6412823796272278, "learning_rate": 1.5056824148041649e-05, "loss": 2.7577409744262695, "step": 30680 }, { "epoch": 0.24773374877908994, "grad_norm": 1.2263798713684082, "learning_rate": 1.5055208678303435e-05, "loss": 2.765666198730469, "step": 30690 }, { "epoch": 0.247814470104857, "grad_norm": 0.7123854756355286, "learning_rate": 1.5053593208565223e-05, "loss": 2.703230285644531, "step": 30700 }, { "epoch": 0.24789519143062405, "grad_norm": 0.6214683055877686, "learning_rate": 1.5051977738827009e-05, "loss": 3.157575798034668, "step": 30710 }, { "epoch": 0.2479759127563911, "grad_norm": 1.343875765800476, "learning_rate": 1.5050362269088796e-05, "loss": 3.406215286254883, "step": 30720 }, { "epoch": 0.24805663408215817, "grad_norm": 0.9070665836334229, "learning_rate": 1.5048746799350582e-05, "loss": 3.598209762573242, "step": 30730 }, { "epoch": 0.24813735540792523, "grad_norm": 0.6529891490936279, "learning_rate": 1.504713132961237e-05, "loss": 2.808784294128418, "step": 30740 }, { "epoch": 0.24821807673369228, "grad_norm": 0.9504375457763672, "learning_rate": 1.5045515859874156e-05, "loss": 3.2618499755859376, "step": 30750 }, { "epoch": 0.24829879805945934, "grad_norm": 0.90876704454422, "learning_rate": 1.5043900390135944e-05, "loss": 3.1549051284790037, "step": 30760 }, { "epoch": 0.24837951938522637, "grad_norm": 0.953101634979248, "learning_rate": 1.504228492039773e-05, "loss": 2.728643035888672, "step": 30770 }, { "epoch": 0.24846024071099343, "grad_norm": 1.1760448217391968, "learning_rate": 1.5040669450659517e-05, "loss": 2.787195587158203, "step": 30780 }, { "epoch": 0.2485409620367605, "grad_norm": 1.2679269313812256, "learning_rate": 1.5039053980921303e-05, "loss": 3.2588619232177733, "step": 30790 }, { "epoch": 0.24862168336252755, "grad_norm": 1.262145757675171, "learning_rate": 1.5037438511183091e-05, "loss": 4.238843536376953, "step": 30800 }, { "epoch": 0.2487024046882946, "grad_norm": 0.6970077157020569, "learning_rate": 1.5035823041444877e-05, "loss": 2.939674949645996, "step": 30810 }, { "epoch": 0.24878312601406166, "grad_norm": 0.7024226188659668, "learning_rate": 1.5034207571706665e-05, "loss": 3.0627389907836915, "step": 30820 }, { "epoch": 0.24886384733982872, "grad_norm": 1.02265465259552, "learning_rate": 1.503259210196845e-05, "loss": 2.939304733276367, "step": 30830 }, { "epoch": 0.24894456866559575, "grad_norm": 1.204399824142456, "learning_rate": 1.5030976632230238e-05, "loss": 2.958497428894043, "step": 30840 }, { "epoch": 0.2490252899913628, "grad_norm": 1.143441081047058, "learning_rate": 1.5029361162492024e-05, "loss": 3.26678581237793, "step": 30850 }, { "epoch": 0.24910601131712987, "grad_norm": 1.3238856792449951, "learning_rate": 1.5027745692753812e-05, "loss": 2.657281684875488, "step": 30860 }, { "epoch": 0.24918673264289692, "grad_norm": 0.7529169917106628, "learning_rate": 1.5026130223015598e-05, "loss": 3.242193603515625, "step": 30870 }, { "epoch": 0.24926745396866398, "grad_norm": 0.9991372227668762, "learning_rate": 1.5024514753277386e-05, "loss": 3.078077507019043, "step": 30880 }, { "epoch": 0.24934817529443104, "grad_norm": 1.1541717052459717, "learning_rate": 1.5022899283539172e-05, "loss": 2.887834930419922, "step": 30890 }, { "epoch": 0.2494288966201981, "grad_norm": 1.1554272174835205, "learning_rate": 1.502128381380096e-05, "loss": 3.5056758880615235, "step": 30900 }, { "epoch": 0.24950961794596516, "grad_norm": 1.343618631362915, "learning_rate": 1.5019668344062746e-05, "loss": 2.9903154373168945, "step": 30910 }, { "epoch": 0.2495903392717322, "grad_norm": 0.9984818696975708, "learning_rate": 1.5018052874324533e-05, "loss": 3.608271026611328, "step": 30920 }, { "epoch": 0.24967106059749924, "grad_norm": 0.8329446315765381, "learning_rate": 1.501643740458632e-05, "loss": 2.9596023559570312, "step": 30930 }, { "epoch": 0.2497517819232663, "grad_norm": 0.7691078782081604, "learning_rate": 1.5014821934848107e-05, "loss": 3.2499195098876954, "step": 30940 }, { "epoch": 0.24983250324903336, "grad_norm": 1.4496926069259644, "learning_rate": 1.5013206465109893e-05, "loss": 3.0148223876953124, "step": 30950 }, { "epoch": 0.24991322457480042, "grad_norm": 1.013102650642395, "learning_rate": 1.501159099537168e-05, "loss": 3.25035400390625, "step": 30960 }, { "epoch": 0.24999394590056748, "grad_norm": 1.6452323198318481, "learning_rate": 1.5009975525633467e-05, "loss": 3.018264579772949, "step": 30970 }, { "epoch": 0.25007466722633453, "grad_norm": 0.9385982751846313, "learning_rate": 1.5008360055895254e-05, "loss": 3.346119689941406, "step": 30980 }, { "epoch": 0.25015538855210157, "grad_norm": 1.5225741863250732, "learning_rate": 1.500674458615704e-05, "loss": 3.397267150878906, "step": 30990 }, { "epoch": 0.25023610987786865, "grad_norm": 1.0411700010299683, "learning_rate": 1.5005129116418828e-05, "loss": 3.400503921508789, "step": 31000 }, { "epoch": 0.2503168312036357, "grad_norm": 0.7476258277893066, "learning_rate": 1.5003513646680614e-05, "loss": 3.1132556915283205, "step": 31010 }, { "epoch": 0.25039755252940277, "grad_norm": 1.193116307258606, "learning_rate": 1.5001898176942402e-05, "loss": 3.097393798828125, "step": 31020 }, { "epoch": 0.2504782738551698, "grad_norm": 1.2226121425628662, "learning_rate": 1.5000282707204188e-05, "loss": 3.0178558349609377, "step": 31030 }, { "epoch": 0.2505589951809368, "grad_norm": 0.8968333601951599, "learning_rate": 1.4998667237465975e-05, "loss": 3.4443187713623047, "step": 31040 }, { "epoch": 0.2506397165067039, "grad_norm": 0.7837483286857605, "learning_rate": 1.4997051767727761e-05, "loss": 2.7739450454711916, "step": 31050 }, { "epoch": 0.25072043783247094, "grad_norm": 1.0200673341751099, "learning_rate": 1.4995436297989549e-05, "loss": 2.970115089416504, "step": 31060 }, { "epoch": 0.25080115915823803, "grad_norm": 1.4240761995315552, "learning_rate": 1.4993820828251335e-05, "loss": 3.124846649169922, "step": 31070 }, { "epoch": 0.25088188048400506, "grad_norm": 0.9263231158256531, "learning_rate": 1.4992205358513123e-05, "loss": 3.7704776763916015, "step": 31080 }, { "epoch": 0.25096260180977215, "grad_norm": 0.6730988621711731, "learning_rate": 1.4990589888774909e-05, "loss": 3.1359878540039063, "step": 31090 }, { "epoch": 0.2510433231355392, "grad_norm": 1.1335790157318115, "learning_rate": 1.4988974419036696e-05, "loss": 3.1775815963745115, "step": 31100 }, { "epoch": 0.2511240444613062, "grad_norm": 0.7706882953643799, "learning_rate": 1.4987358949298482e-05, "loss": 2.7446109771728517, "step": 31110 }, { "epoch": 0.2512047657870733, "grad_norm": 1.1171355247497559, "learning_rate": 1.498574347956027e-05, "loss": 3.189930534362793, "step": 31120 }, { "epoch": 0.2512854871128403, "grad_norm": 0.6143569946289062, "learning_rate": 1.4984128009822056e-05, "loss": 2.9240013122558595, "step": 31130 }, { "epoch": 0.2513662084386074, "grad_norm": 0.90478515625, "learning_rate": 1.4982512540083844e-05, "loss": 3.313999557495117, "step": 31140 }, { "epoch": 0.25144692976437444, "grad_norm": 1.515514850616455, "learning_rate": 1.498089707034563e-05, "loss": 3.0607690811157227, "step": 31150 }, { "epoch": 0.2515276510901415, "grad_norm": 0.9776345491409302, "learning_rate": 1.4979281600607418e-05, "loss": 3.505257797241211, "step": 31160 }, { "epoch": 0.25160837241590855, "grad_norm": 0.812153160572052, "learning_rate": 1.4977666130869204e-05, "loss": 3.0947549819946287, "step": 31170 }, { "epoch": 0.25168909374167564, "grad_norm": 0.9327637553215027, "learning_rate": 1.4976050661130991e-05, "loss": 3.3281864166259765, "step": 31180 }, { "epoch": 0.25176981506744267, "grad_norm": 1.2076611518859863, "learning_rate": 1.4974435191392777e-05, "loss": 3.5736339569091795, "step": 31190 }, { "epoch": 0.2518505363932097, "grad_norm": 1.227927565574646, "learning_rate": 1.4972819721654565e-05, "loss": 3.111484146118164, "step": 31200 }, { "epoch": 0.2519312577189768, "grad_norm": 0.7868800163269043, "learning_rate": 1.4971204251916351e-05, "loss": 3.1182043075561525, "step": 31210 }, { "epoch": 0.2520119790447438, "grad_norm": 0.804667592048645, "learning_rate": 1.4969588782178139e-05, "loss": 3.2065311431884767, "step": 31220 }, { "epoch": 0.2520927003705109, "grad_norm": 0.6525206565856934, "learning_rate": 1.4967973312439925e-05, "loss": 3.28070068359375, "step": 31230 }, { "epoch": 0.25217342169627793, "grad_norm": 0.9888599514961243, "learning_rate": 1.4966357842701712e-05, "loss": 3.2600326538085938, "step": 31240 }, { "epoch": 0.252254143022045, "grad_norm": 0.7574079632759094, "learning_rate": 1.4964742372963498e-05, "loss": 2.8127927780151367, "step": 31250 }, { "epoch": 0.25233486434781205, "grad_norm": 0.8210048079490662, "learning_rate": 1.4963126903225286e-05, "loss": 2.7973211288452147, "step": 31260 }, { "epoch": 0.2524155856735791, "grad_norm": 0.7294892072677612, "learning_rate": 1.4961511433487072e-05, "loss": 3.2716819763183596, "step": 31270 }, { "epoch": 0.25249630699934617, "grad_norm": 0.8061504364013672, "learning_rate": 1.4959895963748861e-05, "loss": 2.9926820755004884, "step": 31280 }, { "epoch": 0.2525770283251132, "grad_norm": 1.0773332118988037, "learning_rate": 1.4958280494010646e-05, "loss": 3.2399410247802733, "step": 31290 }, { "epoch": 0.2526577496508803, "grad_norm": 0.7690991163253784, "learning_rate": 1.4956665024272435e-05, "loss": 3.5671993255615235, "step": 31300 }, { "epoch": 0.2527384709766473, "grad_norm": 1.1333210468292236, "learning_rate": 1.495504955453422e-05, "loss": 3.095210838317871, "step": 31310 }, { "epoch": 0.2528191923024144, "grad_norm": 1.3094098567962646, "learning_rate": 1.4953434084796009e-05, "loss": 3.2781291961669923, "step": 31320 }, { "epoch": 0.2528999136281814, "grad_norm": 0.6109998226165771, "learning_rate": 1.4951818615057793e-05, "loss": 2.836958312988281, "step": 31330 }, { "epoch": 0.25298063495394846, "grad_norm": 1.2244179248809814, "learning_rate": 1.4950203145319582e-05, "loss": 3.0551387786865236, "step": 31340 }, { "epoch": 0.25306135627971554, "grad_norm": 0.9561655521392822, "learning_rate": 1.4948587675581367e-05, "loss": 3.1713274002075194, "step": 31350 }, { "epoch": 0.2531420776054826, "grad_norm": 0.535808801651001, "learning_rate": 1.4946972205843156e-05, "loss": 3.573223114013672, "step": 31360 }, { "epoch": 0.25322279893124966, "grad_norm": 0.7292167544364929, "learning_rate": 1.494535673610494e-05, "loss": 3.180963325500488, "step": 31370 }, { "epoch": 0.2533035202570167, "grad_norm": 1.0113708972930908, "learning_rate": 1.494374126636673e-05, "loss": 2.6995304107666014, "step": 31380 }, { "epoch": 0.2533842415827838, "grad_norm": 0.9102395176887512, "learning_rate": 1.4942125796628514e-05, "loss": 3.037608528137207, "step": 31390 }, { "epoch": 0.2534649629085508, "grad_norm": 0.9526005983352661, "learning_rate": 1.4940510326890304e-05, "loss": 2.783111572265625, "step": 31400 }, { "epoch": 0.2535456842343179, "grad_norm": 0.6972147226333618, "learning_rate": 1.4938894857152088e-05, "loss": 3.4513500213623045, "step": 31410 }, { "epoch": 0.2536264055600849, "grad_norm": 1.093445062637329, "learning_rate": 1.4937279387413877e-05, "loss": 3.1971456527709963, "step": 31420 }, { "epoch": 0.25370712688585195, "grad_norm": 0.6183561682701111, "learning_rate": 1.4935663917675662e-05, "loss": 3.0194272994995117, "step": 31430 }, { "epoch": 0.25378784821161904, "grad_norm": 0.8942025303840637, "learning_rate": 1.4934048447937451e-05, "loss": 3.085980033874512, "step": 31440 }, { "epoch": 0.25386856953738607, "grad_norm": 0.639334499835968, "learning_rate": 1.4932432978199237e-05, "loss": 2.807545852661133, "step": 31450 }, { "epoch": 0.25394929086315315, "grad_norm": 0.7016006708145142, "learning_rate": 1.4930817508461025e-05, "loss": 3.219452667236328, "step": 31460 }, { "epoch": 0.2540300121889202, "grad_norm": 0.9833419919013977, "learning_rate": 1.492920203872281e-05, "loss": 2.863842010498047, "step": 31470 }, { "epoch": 0.25411073351468727, "grad_norm": 0.8813968300819397, "learning_rate": 1.4927586568984598e-05, "loss": 3.127447319030762, "step": 31480 }, { "epoch": 0.2541914548404543, "grad_norm": 1.1372196674346924, "learning_rate": 1.4925971099246384e-05, "loss": 2.6451274871826174, "step": 31490 }, { "epoch": 0.25427217616622133, "grad_norm": 0.8602254390716553, "learning_rate": 1.4924355629508172e-05, "loss": 3.4541484832763674, "step": 31500 }, { "epoch": 0.2543528974919884, "grad_norm": 1.2180362939834595, "learning_rate": 1.4922740159769958e-05, "loss": 2.709330368041992, "step": 31510 }, { "epoch": 0.25443361881775545, "grad_norm": 0.8790220618247986, "learning_rate": 1.4921124690031746e-05, "loss": 3.1897262573242187, "step": 31520 }, { "epoch": 0.25451434014352253, "grad_norm": 0.9354549646377563, "learning_rate": 1.4919509220293532e-05, "loss": 2.9708026885986327, "step": 31530 }, { "epoch": 0.25459506146928956, "grad_norm": 1.0247700214385986, "learning_rate": 1.491789375055532e-05, "loss": 3.032522201538086, "step": 31540 }, { "epoch": 0.25467578279505665, "grad_norm": 0.7811582684516907, "learning_rate": 1.4916278280817105e-05, "loss": 2.9372467041015624, "step": 31550 }, { "epoch": 0.2547565041208237, "grad_norm": 1.0822703838348389, "learning_rate": 1.4914662811078893e-05, "loss": 2.8817584991455076, "step": 31560 }, { "epoch": 0.2548372254465907, "grad_norm": 1.2969510555267334, "learning_rate": 1.491304734134068e-05, "loss": 2.925254249572754, "step": 31570 }, { "epoch": 0.2549179467723578, "grad_norm": 0.9884865880012512, "learning_rate": 1.4911431871602467e-05, "loss": 3.0059326171875, "step": 31580 }, { "epoch": 0.2549986680981248, "grad_norm": 0.8987166285514832, "learning_rate": 1.4909816401864254e-05, "loss": 3.215787887573242, "step": 31590 }, { "epoch": 0.2550793894238919, "grad_norm": 0.6933796405792236, "learning_rate": 1.490820093212604e-05, "loss": 3.0230686187744142, "step": 31600 }, { "epoch": 0.25516011074965894, "grad_norm": 0.6962625980377197, "learning_rate": 1.4906585462387828e-05, "loss": 3.069357490539551, "step": 31610 }, { "epoch": 0.255240832075426, "grad_norm": 0.9488569498062134, "learning_rate": 1.4904969992649614e-05, "loss": 2.934203338623047, "step": 31620 }, { "epoch": 0.25532155340119306, "grad_norm": 0.6757266521453857, "learning_rate": 1.4903354522911402e-05, "loss": 2.9367483139038084, "step": 31630 }, { "epoch": 0.2554022747269601, "grad_norm": 1.3474119901657104, "learning_rate": 1.4901739053173188e-05, "loss": 3.5055221557617187, "step": 31640 }, { "epoch": 0.2554829960527272, "grad_norm": 1.157685399055481, "learning_rate": 1.4900123583434976e-05, "loss": 3.0787357330322265, "step": 31650 }, { "epoch": 0.2555637173784942, "grad_norm": 1.0822539329528809, "learning_rate": 1.4898508113696762e-05, "loss": 3.221058654785156, "step": 31660 }, { "epoch": 0.2556444387042613, "grad_norm": 1.0543043613433838, "learning_rate": 1.489689264395855e-05, "loss": 3.0875377655029297, "step": 31670 }, { "epoch": 0.2557251600300283, "grad_norm": 1.4012633562088013, "learning_rate": 1.4895277174220335e-05, "loss": 2.968450927734375, "step": 31680 }, { "epoch": 0.2558058813557954, "grad_norm": 1.0730928182601929, "learning_rate": 1.4893661704482123e-05, "loss": 3.137886619567871, "step": 31690 }, { "epoch": 0.25588660268156244, "grad_norm": 0.7546489834785461, "learning_rate": 1.4892046234743909e-05, "loss": 2.828079605102539, "step": 31700 }, { "epoch": 0.2559673240073295, "grad_norm": 0.9881263971328735, "learning_rate": 1.4890430765005697e-05, "loss": 2.893581771850586, "step": 31710 }, { "epoch": 0.25604804533309655, "grad_norm": 1.175313115119934, "learning_rate": 1.4888815295267483e-05, "loss": 2.9343347549438477, "step": 31720 }, { "epoch": 0.2561287666588636, "grad_norm": 0.7863474488258362, "learning_rate": 1.488719982552927e-05, "loss": 3.6101608276367188, "step": 31730 }, { "epoch": 0.25620948798463067, "grad_norm": 1.1737120151519775, "learning_rate": 1.4885584355791056e-05, "loss": 2.980678176879883, "step": 31740 }, { "epoch": 0.2562902093103977, "grad_norm": 1.2419060468673706, "learning_rate": 1.4883968886052844e-05, "loss": 2.8511636734008787, "step": 31750 }, { "epoch": 0.2563709306361648, "grad_norm": 1.8786791563034058, "learning_rate": 1.488235341631463e-05, "loss": 3.1072277069091796, "step": 31760 }, { "epoch": 0.2564516519619318, "grad_norm": 1.0817910432815552, "learning_rate": 1.4880737946576418e-05, "loss": 3.436686706542969, "step": 31770 }, { "epoch": 0.2565323732876989, "grad_norm": 0.8955574035644531, "learning_rate": 1.4879122476838204e-05, "loss": 3.014764404296875, "step": 31780 }, { "epoch": 0.25661309461346593, "grad_norm": 1.0009890794754028, "learning_rate": 1.4877507007099991e-05, "loss": 3.0703197479248048, "step": 31790 }, { "epoch": 0.25669381593923296, "grad_norm": 1.3135868310928345, "learning_rate": 1.4875891537361777e-05, "loss": 3.0518184661865235, "step": 31800 }, { "epoch": 0.25677453726500005, "grad_norm": 0.8283389806747437, "learning_rate": 1.4874276067623565e-05, "loss": 2.9787342071533205, "step": 31810 }, { "epoch": 0.2568552585907671, "grad_norm": 1.3527253866195679, "learning_rate": 1.4872660597885351e-05, "loss": 2.779331588745117, "step": 31820 }, { "epoch": 0.25693597991653416, "grad_norm": 0.6850487589836121, "learning_rate": 1.4871045128147139e-05, "loss": 3.000136375427246, "step": 31830 }, { "epoch": 0.2570167012423012, "grad_norm": 1.035703420639038, "learning_rate": 1.4869429658408925e-05, "loss": 2.9814630508422852, "step": 31840 }, { "epoch": 0.2570974225680683, "grad_norm": 1.2814358472824097, "learning_rate": 1.4867814188670712e-05, "loss": 3.1860687255859377, "step": 31850 }, { "epoch": 0.2571781438938353, "grad_norm": 1.0375581979751587, "learning_rate": 1.4866198718932498e-05, "loss": 3.1666669845581055, "step": 31860 }, { "epoch": 0.25725886521960234, "grad_norm": 0.684887170791626, "learning_rate": 1.4864583249194286e-05, "loss": 3.067955207824707, "step": 31870 }, { "epoch": 0.2573395865453694, "grad_norm": 1.46497642993927, "learning_rate": 1.4862967779456072e-05, "loss": 3.2156646728515623, "step": 31880 }, { "epoch": 0.25742030787113646, "grad_norm": 1.0864235162734985, "learning_rate": 1.486135230971786e-05, "loss": 2.671284294128418, "step": 31890 }, { "epoch": 0.25750102919690354, "grad_norm": 0.8851853013038635, "learning_rate": 1.4859736839979646e-05, "loss": 3.655725860595703, "step": 31900 }, { "epoch": 0.25758175052267057, "grad_norm": 1.1698527336120605, "learning_rate": 1.4858121370241434e-05, "loss": 3.223903274536133, "step": 31910 }, { "epoch": 0.25766247184843766, "grad_norm": 1.008304476737976, "learning_rate": 1.485650590050322e-05, "loss": 3.0275888442993164, "step": 31920 }, { "epoch": 0.2577431931742047, "grad_norm": 1.1235891580581665, "learning_rate": 1.4854890430765007e-05, "loss": 2.8010784149169923, "step": 31930 }, { "epoch": 0.2578239144999718, "grad_norm": 0.9006630778312683, "learning_rate": 1.4853274961026793e-05, "loss": 3.199581527709961, "step": 31940 }, { "epoch": 0.2579046358257388, "grad_norm": 0.8434034585952759, "learning_rate": 1.4851659491288581e-05, "loss": 2.851248550415039, "step": 31950 }, { "epoch": 0.25798535715150583, "grad_norm": 1.0410693883895874, "learning_rate": 1.4850044021550367e-05, "loss": 3.3528675079345702, "step": 31960 }, { "epoch": 0.2580660784772729, "grad_norm": 0.9486970901489258, "learning_rate": 1.4848428551812155e-05, "loss": 3.7084808349609375, "step": 31970 }, { "epoch": 0.25814679980303995, "grad_norm": 1.0048387050628662, "learning_rate": 1.484681308207394e-05, "loss": 2.828958511352539, "step": 31980 }, { "epoch": 0.25822752112880704, "grad_norm": 0.6874575018882751, "learning_rate": 1.4845197612335728e-05, "loss": 2.9212820053100588, "step": 31990 }, { "epoch": 0.25830824245457407, "grad_norm": 0.9379770755767822, "learning_rate": 1.4843582142597514e-05, "loss": 3.203390121459961, "step": 32000 }, { "epoch": 0.25838896378034115, "grad_norm": 0.8862240314483643, "learning_rate": 1.4841966672859302e-05, "loss": 3.1973699569702148, "step": 32010 }, { "epoch": 0.2584696851061082, "grad_norm": 0.6402549743652344, "learning_rate": 1.4840351203121088e-05, "loss": 3.755308151245117, "step": 32020 }, { "epoch": 0.2585504064318752, "grad_norm": 0.9100818634033203, "learning_rate": 1.4838735733382876e-05, "loss": 3.139652061462402, "step": 32030 }, { "epoch": 0.2586311277576423, "grad_norm": 1.0541396141052246, "learning_rate": 1.4837120263644662e-05, "loss": 3.453447723388672, "step": 32040 }, { "epoch": 0.25871184908340933, "grad_norm": 0.9600138664245605, "learning_rate": 1.483550479390645e-05, "loss": 3.163119888305664, "step": 32050 }, { "epoch": 0.2587925704091764, "grad_norm": 1.59857177734375, "learning_rate": 1.4833889324168235e-05, "loss": 3.07336311340332, "step": 32060 }, { "epoch": 0.25887329173494344, "grad_norm": 1.5072392225265503, "learning_rate": 1.4832273854430023e-05, "loss": 3.2711231231689455, "step": 32070 }, { "epoch": 0.25895401306071053, "grad_norm": 1.1606831550598145, "learning_rate": 1.4830658384691809e-05, "loss": 2.9389728546142577, "step": 32080 }, { "epoch": 0.25903473438647756, "grad_norm": 1.135981798171997, "learning_rate": 1.4829042914953597e-05, "loss": 3.081490135192871, "step": 32090 }, { "epoch": 0.2591154557122446, "grad_norm": 1.0037966966629028, "learning_rate": 1.4827427445215383e-05, "loss": 3.174420166015625, "step": 32100 }, { "epoch": 0.2591961770380117, "grad_norm": 1.016163945198059, "learning_rate": 1.482581197547717e-05, "loss": 3.459614944458008, "step": 32110 }, { "epoch": 0.2592768983637787, "grad_norm": 1.1375283002853394, "learning_rate": 1.4824196505738956e-05, "loss": 3.2189998626708984, "step": 32120 }, { "epoch": 0.2593576196895458, "grad_norm": 0.8318527936935425, "learning_rate": 1.4822581036000744e-05, "loss": 3.059775543212891, "step": 32130 }, { "epoch": 0.2594383410153128, "grad_norm": 1.3054327964782715, "learning_rate": 1.482096556626253e-05, "loss": 3.1032114028930664, "step": 32140 }, { "epoch": 0.2595190623410799, "grad_norm": 0.9508140087127686, "learning_rate": 1.481935009652432e-05, "loss": 3.8472366333007812, "step": 32150 }, { "epoch": 0.25959978366684694, "grad_norm": 0.9533781409263611, "learning_rate": 1.4817734626786104e-05, "loss": 3.289958953857422, "step": 32160 }, { "epoch": 0.259680504992614, "grad_norm": 1.6333919763565063, "learning_rate": 1.4816119157047893e-05, "loss": 3.3095645904541016, "step": 32170 }, { "epoch": 0.25976122631838106, "grad_norm": 0.9207125306129456, "learning_rate": 1.4814503687309677e-05, "loss": 3.6269454956054688, "step": 32180 }, { "epoch": 0.2598419476441481, "grad_norm": 1.3349541425704956, "learning_rate": 1.4812888217571467e-05, "loss": 2.8660348892211913, "step": 32190 }, { "epoch": 0.25992266896991517, "grad_norm": 1.4066028594970703, "learning_rate": 1.4811272747833251e-05, "loss": 2.8441381454467773, "step": 32200 }, { "epoch": 0.2600033902956822, "grad_norm": 1.1195520162582397, "learning_rate": 1.480965727809504e-05, "loss": 2.9149951934814453, "step": 32210 }, { "epoch": 0.2600841116214493, "grad_norm": 1.2186025381088257, "learning_rate": 1.4808041808356825e-05, "loss": 2.911486053466797, "step": 32220 }, { "epoch": 0.2601648329472163, "grad_norm": 1.0956164598464966, "learning_rate": 1.4806426338618614e-05, "loss": 3.2326309204101564, "step": 32230 }, { "epoch": 0.2602455542729834, "grad_norm": 0.5368409752845764, "learning_rate": 1.4804810868880399e-05, "loss": 3.4079097747802733, "step": 32240 }, { "epoch": 0.26032627559875043, "grad_norm": 1.0956811904907227, "learning_rate": 1.4803195399142188e-05, "loss": 3.6743167877197265, "step": 32250 }, { "epoch": 0.26040699692451746, "grad_norm": 1.4582182168960571, "learning_rate": 1.4801579929403972e-05, "loss": 2.668178939819336, "step": 32260 }, { "epoch": 0.26048771825028455, "grad_norm": 1.2693970203399658, "learning_rate": 1.4799964459665762e-05, "loss": 2.7830528259277343, "step": 32270 }, { "epoch": 0.2605684395760516, "grad_norm": 1.2603737115859985, "learning_rate": 1.4798348989927546e-05, "loss": 3.6601547241210937, "step": 32280 }, { "epoch": 0.26064916090181867, "grad_norm": 0.72533118724823, "learning_rate": 1.4796733520189335e-05, "loss": 2.898652267456055, "step": 32290 }, { "epoch": 0.2607298822275857, "grad_norm": 0.865257978439331, "learning_rate": 1.479511805045112e-05, "loss": 2.995822525024414, "step": 32300 }, { "epoch": 0.2608106035533528, "grad_norm": 0.8113510608673096, "learning_rate": 1.4793502580712909e-05, "loss": 2.955408477783203, "step": 32310 }, { "epoch": 0.2608913248791198, "grad_norm": 1.835212230682373, "learning_rate": 1.4791887110974695e-05, "loss": 2.9086517333984374, "step": 32320 }, { "epoch": 0.26097204620488684, "grad_norm": 1.0918337106704712, "learning_rate": 1.4790271641236483e-05, "loss": 3.363056945800781, "step": 32330 }, { "epoch": 0.26105276753065393, "grad_norm": 0.8565508723258972, "learning_rate": 1.4788656171498269e-05, "loss": 2.8614025115966797, "step": 32340 }, { "epoch": 0.26113348885642096, "grad_norm": 1.3503004312515259, "learning_rate": 1.4787040701760056e-05, "loss": 2.9549066543579103, "step": 32350 }, { "epoch": 0.26121421018218804, "grad_norm": 0.6620328426361084, "learning_rate": 1.4785425232021842e-05, "loss": 3.378204345703125, "step": 32360 }, { "epoch": 0.2612949315079551, "grad_norm": 0.8144506812095642, "learning_rate": 1.478380976228363e-05, "loss": 3.1027236938476563, "step": 32370 }, { "epoch": 0.26137565283372216, "grad_norm": 0.7262131571769714, "learning_rate": 1.4782194292545416e-05, "loss": 2.8817922592163088, "step": 32380 }, { "epoch": 0.2614563741594892, "grad_norm": 0.7548544406890869, "learning_rate": 1.4780578822807204e-05, "loss": 2.8853103637695314, "step": 32390 }, { "epoch": 0.2615370954852563, "grad_norm": 1.394696831703186, "learning_rate": 1.477896335306899e-05, "loss": 2.7808008193969727, "step": 32400 }, { "epoch": 0.2616178168110233, "grad_norm": 1.0230381488800049, "learning_rate": 1.4777347883330777e-05, "loss": 2.980661392211914, "step": 32410 }, { "epoch": 0.26169853813679034, "grad_norm": 0.7660446763038635, "learning_rate": 1.4775732413592563e-05, "loss": 3.413407897949219, "step": 32420 }, { "epoch": 0.2617792594625574, "grad_norm": 0.7972636222839355, "learning_rate": 1.4774116943854351e-05, "loss": 2.7821304321289064, "step": 32430 }, { "epoch": 0.26185998078832445, "grad_norm": 0.7640714049339294, "learning_rate": 1.4772501474116137e-05, "loss": 2.590493392944336, "step": 32440 }, { "epoch": 0.26194070211409154, "grad_norm": 1.1176127195358276, "learning_rate": 1.4770886004377925e-05, "loss": 2.9997949600219727, "step": 32450 }, { "epoch": 0.26202142343985857, "grad_norm": 0.6768150329589844, "learning_rate": 1.4769270534639711e-05, "loss": 3.274081802368164, "step": 32460 }, { "epoch": 0.26210214476562566, "grad_norm": 0.826118528842926, "learning_rate": 1.4767655064901499e-05, "loss": 3.4176872253417967, "step": 32470 }, { "epoch": 0.2621828660913927, "grad_norm": 0.7959719300270081, "learning_rate": 1.4766039595163285e-05, "loss": 2.8511512756347654, "step": 32480 }, { "epoch": 0.2622635874171597, "grad_norm": 0.7675473690032959, "learning_rate": 1.4764424125425072e-05, "loss": 3.4887386322021485, "step": 32490 }, { "epoch": 0.2623443087429268, "grad_norm": 0.6441812515258789, "learning_rate": 1.4762808655686858e-05, "loss": 2.8178815841674805, "step": 32500 }, { "epoch": 0.26242503006869383, "grad_norm": 1.7508941888809204, "learning_rate": 1.4761193185948646e-05, "loss": 3.133635330200195, "step": 32510 }, { "epoch": 0.2625057513944609, "grad_norm": 0.8234955668449402, "learning_rate": 1.4759577716210432e-05, "loss": 3.475167465209961, "step": 32520 }, { "epoch": 0.26258647272022795, "grad_norm": 3.2248854637145996, "learning_rate": 1.475796224647222e-05, "loss": 3.9193580627441404, "step": 32530 }, { "epoch": 0.26266719404599503, "grad_norm": 0.9506625533103943, "learning_rate": 1.4756346776734006e-05, "loss": 2.779581069946289, "step": 32540 }, { "epoch": 0.26274791537176206, "grad_norm": 4.6602959632873535, "learning_rate": 1.4754731306995793e-05, "loss": 3.7073696136474608, "step": 32550 }, { "epoch": 0.2628286366975291, "grad_norm": 0.8242933750152588, "learning_rate": 1.475311583725758e-05, "loss": 3.364398956298828, "step": 32560 }, { "epoch": 0.2629093580232962, "grad_norm": 1.2883590459823608, "learning_rate": 1.4751500367519367e-05, "loss": 2.7839372634887694, "step": 32570 }, { "epoch": 0.2629900793490632, "grad_norm": 1.1389505863189697, "learning_rate": 1.4749884897781153e-05, "loss": 2.873274230957031, "step": 32580 }, { "epoch": 0.2630708006748303, "grad_norm": 0.994869589805603, "learning_rate": 1.474826942804294e-05, "loss": 3.706743621826172, "step": 32590 }, { "epoch": 0.2631515220005973, "grad_norm": 0.7150609493255615, "learning_rate": 1.4746653958304727e-05, "loss": 2.8694210052490234, "step": 32600 }, { "epoch": 0.2632322433263644, "grad_norm": 1.129193663597107, "learning_rate": 1.4745038488566514e-05, "loss": 3.0519664764404295, "step": 32610 }, { "epoch": 0.26331296465213144, "grad_norm": 0.6470587849617004, "learning_rate": 1.47434230188283e-05, "loss": 2.8098386764526366, "step": 32620 }, { "epoch": 0.26339368597789853, "grad_norm": 0.7748976945877075, "learning_rate": 1.4741807549090088e-05, "loss": 3.0307870864868165, "step": 32630 }, { "epoch": 0.26347440730366556, "grad_norm": 1.6379551887512207, "learning_rate": 1.4740192079351874e-05, "loss": 2.9270999908447264, "step": 32640 }, { "epoch": 0.2635551286294326, "grad_norm": 1.0127464532852173, "learning_rate": 1.4738576609613662e-05, "loss": 2.65736083984375, "step": 32650 }, { "epoch": 0.2636358499551997, "grad_norm": 0.7842034697532654, "learning_rate": 1.4736961139875448e-05, "loss": 3.221063995361328, "step": 32660 }, { "epoch": 0.2637165712809667, "grad_norm": 1.1193767786026, "learning_rate": 1.4735345670137235e-05, "loss": 3.1494623184204102, "step": 32670 }, { "epoch": 0.2637972926067338, "grad_norm": 0.6716192364692688, "learning_rate": 1.4733730200399021e-05, "loss": 2.7439836502075194, "step": 32680 }, { "epoch": 0.2638780139325008, "grad_norm": 1.3297486305236816, "learning_rate": 1.473211473066081e-05, "loss": 2.7857440948486327, "step": 32690 }, { "epoch": 0.2639587352582679, "grad_norm": 0.7047812342643738, "learning_rate": 1.4730499260922595e-05, "loss": 3.4560791015625, "step": 32700 }, { "epoch": 0.26403945658403494, "grad_norm": 0.9538123607635498, "learning_rate": 1.4728883791184383e-05, "loss": 4.360540390014648, "step": 32710 }, { "epoch": 0.26412017790980197, "grad_norm": 1.3635045289993286, "learning_rate": 1.4727268321446169e-05, "loss": 3.1326440811157226, "step": 32720 }, { "epoch": 0.26420089923556905, "grad_norm": 2.106318712234497, "learning_rate": 1.4725652851707957e-05, "loss": 3.163736343383789, "step": 32730 }, { "epoch": 0.2642816205613361, "grad_norm": 1.1882617473602295, "learning_rate": 1.4724037381969743e-05, "loss": 3.2021732330322266, "step": 32740 }, { "epoch": 0.26436234188710317, "grad_norm": 1.0616164207458496, "learning_rate": 1.472242191223153e-05, "loss": 3.004823112487793, "step": 32750 }, { "epoch": 0.2644430632128702, "grad_norm": 1.572638750076294, "learning_rate": 1.4720806442493316e-05, "loss": 2.8619112014770507, "step": 32760 }, { "epoch": 0.2645237845386373, "grad_norm": 0.9641366600990295, "learning_rate": 1.4719190972755104e-05, "loss": 3.666116714477539, "step": 32770 }, { "epoch": 0.2646045058644043, "grad_norm": 2.394831657409668, "learning_rate": 1.471757550301689e-05, "loss": 3.1092544555664063, "step": 32780 }, { "epoch": 0.26468522719017135, "grad_norm": 0.9082217812538147, "learning_rate": 1.4715960033278678e-05, "loss": 2.738056755065918, "step": 32790 }, { "epoch": 0.26476594851593843, "grad_norm": 1.1413750648498535, "learning_rate": 1.4714344563540464e-05, "loss": 3.168403434753418, "step": 32800 }, { "epoch": 0.26484666984170546, "grad_norm": 0.6670842170715332, "learning_rate": 1.4712729093802251e-05, "loss": 2.9378841400146483, "step": 32810 }, { "epoch": 0.26492739116747255, "grad_norm": 0.7947919964790344, "learning_rate": 1.4711113624064039e-05, "loss": 3.3892597198486327, "step": 32820 }, { "epoch": 0.2650081124932396, "grad_norm": 1.1964664459228516, "learning_rate": 1.4709498154325825e-05, "loss": 3.108588981628418, "step": 32830 }, { "epoch": 0.26508883381900666, "grad_norm": 0.7795996069908142, "learning_rate": 1.4707882684587613e-05, "loss": 3.9499385833740233, "step": 32840 }, { "epoch": 0.2651695551447737, "grad_norm": 0.6707693338394165, "learning_rate": 1.4706267214849399e-05, "loss": 2.8381980895996093, "step": 32850 }, { "epoch": 0.2652502764705407, "grad_norm": 1.9474259614944458, "learning_rate": 1.4704651745111186e-05, "loss": 3.5062030792236327, "step": 32860 }, { "epoch": 0.2653309977963078, "grad_norm": 1.1809695959091187, "learning_rate": 1.4703036275372972e-05, "loss": 3.082062339782715, "step": 32870 }, { "epoch": 0.26541171912207484, "grad_norm": 0.8333737254142761, "learning_rate": 1.470142080563476e-05, "loss": 2.422878456115723, "step": 32880 }, { "epoch": 0.2654924404478419, "grad_norm": 0.8591476678848267, "learning_rate": 1.4699805335896546e-05, "loss": 3.1788305282592773, "step": 32890 }, { "epoch": 0.26557316177360896, "grad_norm": 1.1097067594528198, "learning_rate": 1.4698189866158334e-05, "loss": 3.309535598754883, "step": 32900 }, { "epoch": 0.26565388309937604, "grad_norm": 0.9746884703636169, "learning_rate": 1.469657439642012e-05, "loss": 3.1548650741577147, "step": 32910 }, { "epoch": 0.2657346044251431, "grad_norm": 1.1214107275009155, "learning_rate": 1.4694958926681907e-05, "loss": 3.1042863845825197, "step": 32920 }, { "epoch": 0.26581532575091016, "grad_norm": 0.7074277997016907, "learning_rate": 1.4693343456943693e-05, "loss": 3.001325225830078, "step": 32930 }, { "epoch": 0.2658960470766772, "grad_norm": 1.7519563436508179, "learning_rate": 1.4691727987205481e-05, "loss": 3.158843421936035, "step": 32940 }, { "epoch": 0.2659767684024442, "grad_norm": 0.8875615000724792, "learning_rate": 1.4690112517467267e-05, "loss": 2.841629409790039, "step": 32950 }, { "epoch": 0.2660574897282113, "grad_norm": 1.0296632051467896, "learning_rate": 1.4688497047729055e-05, "loss": 3.0100627899169923, "step": 32960 }, { "epoch": 0.26613821105397834, "grad_norm": 1.3285588026046753, "learning_rate": 1.4686881577990841e-05, "loss": 3.6676795959472654, "step": 32970 }, { "epoch": 0.2662189323797454, "grad_norm": 1.0493804216384888, "learning_rate": 1.4685266108252629e-05, "loss": 2.950979804992676, "step": 32980 }, { "epoch": 0.26629965370551245, "grad_norm": 0.8301354646682739, "learning_rate": 1.4683650638514415e-05, "loss": 3.3872127532958984, "step": 32990 }, { "epoch": 0.26638037503127954, "grad_norm": 1.3414732217788696, "learning_rate": 1.4682035168776202e-05, "loss": 3.3459678649902345, "step": 33000 }, { "epoch": 0.26646109635704657, "grad_norm": 0.6575630307197571, "learning_rate": 1.4680419699037988e-05, "loss": 2.9683460235595702, "step": 33010 }, { "epoch": 0.2665418176828136, "grad_norm": 2.5859105587005615, "learning_rate": 1.4678804229299778e-05, "loss": 3.204161834716797, "step": 33020 }, { "epoch": 0.2666225390085807, "grad_norm": 0.7695658206939697, "learning_rate": 1.4677188759561562e-05, "loss": 2.9324573516845702, "step": 33030 }, { "epoch": 0.2667032603343477, "grad_norm": 1.1634025573730469, "learning_rate": 1.4675573289823351e-05, "loss": 3.240324020385742, "step": 33040 }, { "epoch": 0.2667839816601148, "grad_norm": 0.9031772613525391, "learning_rate": 1.4673957820085136e-05, "loss": 3.0871795654296874, "step": 33050 }, { "epoch": 0.26686470298588183, "grad_norm": 1.1738654375076294, "learning_rate": 1.4672342350346925e-05, "loss": 3.1861640930175783, "step": 33060 }, { "epoch": 0.2669454243116489, "grad_norm": 1.175755500793457, "learning_rate": 1.467072688060871e-05, "loss": 3.113749122619629, "step": 33070 }, { "epoch": 0.26702614563741595, "grad_norm": 1.027342677116394, "learning_rate": 1.4669111410870499e-05, "loss": 3.1795202255249024, "step": 33080 }, { "epoch": 0.267106866963183, "grad_norm": 0.6611551642417908, "learning_rate": 1.4667495941132283e-05, "loss": 2.580234336853027, "step": 33090 }, { "epoch": 0.26718758828895006, "grad_norm": 0.9272757768630981, "learning_rate": 1.4665880471394072e-05, "loss": 3.256386947631836, "step": 33100 }, { "epoch": 0.2672683096147171, "grad_norm": 0.9686049818992615, "learning_rate": 1.4664265001655857e-05, "loss": 3.235747146606445, "step": 33110 }, { "epoch": 0.2673490309404842, "grad_norm": 1.3091962337493896, "learning_rate": 1.4662649531917646e-05, "loss": 3.258311080932617, "step": 33120 }, { "epoch": 0.2674297522662512, "grad_norm": 1.317558765411377, "learning_rate": 1.466103406217943e-05, "loss": 3.453083801269531, "step": 33130 }, { "epoch": 0.2675104735920183, "grad_norm": 0.9300373196601868, "learning_rate": 1.465941859244122e-05, "loss": 2.9347105026245117, "step": 33140 }, { "epoch": 0.2675911949177853, "grad_norm": 0.7750905156135559, "learning_rate": 1.4657803122703004e-05, "loss": 3.1743495941162108, "step": 33150 }, { "epoch": 0.2676719162435524, "grad_norm": 0.692302405834198, "learning_rate": 1.4656187652964793e-05, "loss": 3.0202873229980467, "step": 33160 }, { "epoch": 0.26775263756931944, "grad_norm": 0.668463945388794, "learning_rate": 1.4654572183226578e-05, "loss": 2.842772102355957, "step": 33170 }, { "epoch": 0.26783335889508647, "grad_norm": 1.7247079610824585, "learning_rate": 1.4652956713488367e-05, "loss": 2.806008720397949, "step": 33180 }, { "epoch": 0.26791408022085356, "grad_norm": 0.9416115880012512, "learning_rate": 1.4651341243750153e-05, "loss": 2.9327629089355467, "step": 33190 }, { "epoch": 0.2679948015466206, "grad_norm": 0.9388035535812378, "learning_rate": 1.464972577401194e-05, "loss": 2.989643859863281, "step": 33200 }, { "epoch": 0.2680755228723877, "grad_norm": 1.7962138652801514, "learning_rate": 1.4648110304273727e-05, "loss": 2.839914894104004, "step": 33210 }, { "epoch": 0.2681562441981547, "grad_norm": 0.6508623957633972, "learning_rate": 1.4646494834535515e-05, "loss": 2.932961082458496, "step": 33220 }, { "epoch": 0.2682369655239218, "grad_norm": 1.0286011695861816, "learning_rate": 1.46448793647973e-05, "loss": 2.9007938385009764, "step": 33230 }, { "epoch": 0.2683176868496888, "grad_norm": 1.1534309387207031, "learning_rate": 1.4643263895059088e-05, "loss": 3.406160354614258, "step": 33240 }, { "epoch": 0.26839840817545585, "grad_norm": 1.1099356412887573, "learning_rate": 1.4641648425320874e-05, "loss": 3.4330249786376954, "step": 33250 }, { "epoch": 0.26847912950122294, "grad_norm": 1.0911834239959717, "learning_rate": 1.4640032955582662e-05, "loss": 2.8492177963256835, "step": 33260 }, { "epoch": 0.26855985082698997, "grad_norm": 1.2593016624450684, "learning_rate": 1.4638417485844448e-05, "loss": 3.6929862976074217, "step": 33270 }, { "epoch": 0.26864057215275705, "grad_norm": 0.6934056878089905, "learning_rate": 1.4636802016106236e-05, "loss": 2.6778127670288088, "step": 33280 }, { "epoch": 0.2687212934785241, "grad_norm": 1.090631365776062, "learning_rate": 1.4635186546368022e-05, "loss": 3.606991195678711, "step": 33290 }, { "epoch": 0.26880201480429117, "grad_norm": 1.3831634521484375, "learning_rate": 1.463357107662981e-05, "loss": 3.365982437133789, "step": 33300 }, { "epoch": 0.2688827361300582, "grad_norm": 0.8445745706558228, "learning_rate": 1.4631955606891595e-05, "loss": 3.2799915313720702, "step": 33310 }, { "epoch": 0.26896345745582523, "grad_norm": 1.261597752571106, "learning_rate": 1.4630340137153383e-05, "loss": 3.6981952667236326, "step": 33320 }, { "epoch": 0.2690441787815923, "grad_norm": 1.4491156339645386, "learning_rate": 1.4628724667415169e-05, "loss": 3.1030914306640627, "step": 33330 }, { "epoch": 0.26912490010735934, "grad_norm": 0.7830358743667603, "learning_rate": 1.4627109197676957e-05, "loss": 3.150486183166504, "step": 33340 }, { "epoch": 0.26920562143312643, "grad_norm": 0.6163325309753418, "learning_rate": 1.4625493727938743e-05, "loss": 2.8002994537353514, "step": 33350 }, { "epoch": 0.26928634275889346, "grad_norm": 0.7746368646621704, "learning_rate": 1.462387825820053e-05, "loss": 2.9717021942138673, "step": 33360 }, { "epoch": 0.26936706408466055, "grad_norm": 1.251901388168335, "learning_rate": 1.4622262788462316e-05, "loss": 3.2104496002197265, "step": 33370 }, { "epoch": 0.2694477854104276, "grad_norm": 1.089675784111023, "learning_rate": 1.4620647318724104e-05, "loss": 3.6912975311279297, "step": 33380 }, { "epoch": 0.26952850673619466, "grad_norm": 0.9363368153572083, "learning_rate": 1.461903184898589e-05, "loss": 3.0122377395629885, "step": 33390 }, { "epoch": 0.2696092280619617, "grad_norm": 0.8383911848068237, "learning_rate": 1.4617416379247678e-05, "loss": 3.1530921936035154, "step": 33400 }, { "epoch": 0.2696899493877287, "grad_norm": 1.2613279819488525, "learning_rate": 1.4615800909509464e-05, "loss": 3.0563488006591797, "step": 33410 }, { "epoch": 0.2697706707134958, "grad_norm": 0.9491397738456726, "learning_rate": 1.4614185439771251e-05, "loss": 3.1698490142822267, "step": 33420 }, { "epoch": 0.26985139203926284, "grad_norm": 0.9028531908988953, "learning_rate": 1.4612569970033037e-05, "loss": 2.756581497192383, "step": 33430 }, { "epoch": 0.2699321133650299, "grad_norm": 0.706174910068512, "learning_rate": 1.4610954500294825e-05, "loss": 3.4060489654541017, "step": 33440 }, { "epoch": 0.27001283469079695, "grad_norm": 0.6584724187850952, "learning_rate": 1.4609339030556611e-05, "loss": 2.792544937133789, "step": 33450 }, { "epoch": 0.27009355601656404, "grad_norm": 1.0791380405426025, "learning_rate": 1.4607723560818399e-05, "loss": 2.8507070541381836, "step": 33460 }, { "epoch": 0.27017427734233107, "grad_norm": 0.66196608543396, "learning_rate": 1.4606108091080185e-05, "loss": 2.786189079284668, "step": 33470 }, { "epoch": 0.2702549986680981, "grad_norm": 1.0525768995285034, "learning_rate": 1.4604492621341973e-05, "loss": 3.1843549728393556, "step": 33480 }, { "epoch": 0.2703357199938652, "grad_norm": 0.8513967990875244, "learning_rate": 1.4602877151603759e-05, "loss": 3.269512939453125, "step": 33490 }, { "epoch": 0.2704164413196322, "grad_norm": 0.9605390429496765, "learning_rate": 1.4601261681865546e-05, "loss": 3.2761005401611327, "step": 33500 }, { "epoch": 0.2704971626453993, "grad_norm": 1.0859012603759766, "learning_rate": 1.4599646212127332e-05, "loss": 3.607094573974609, "step": 33510 }, { "epoch": 0.27057788397116633, "grad_norm": 1.083570122718811, "learning_rate": 1.459803074238912e-05, "loss": 3.0137327194213865, "step": 33520 }, { "epoch": 0.2706586052969334, "grad_norm": 0.9339305758476257, "learning_rate": 1.4596415272650906e-05, "loss": 3.4896183013916016, "step": 33530 }, { "epoch": 0.27073932662270045, "grad_norm": 0.6354348063468933, "learning_rate": 1.4594799802912694e-05, "loss": 3.018581581115723, "step": 33540 }, { "epoch": 0.2708200479484675, "grad_norm": 1.0564302206039429, "learning_rate": 1.459318433317448e-05, "loss": 3.27392692565918, "step": 33550 }, { "epoch": 0.27090076927423457, "grad_norm": 0.6860177516937256, "learning_rate": 1.4591568863436267e-05, "loss": 3.164430618286133, "step": 33560 }, { "epoch": 0.2709814906000016, "grad_norm": 1.1685136556625366, "learning_rate": 1.4589953393698053e-05, "loss": 3.667771911621094, "step": 33570 }, { "epoch": 0.2710622119257687, "grad_norm": 1.5438119173049927, "learning_rate": 1.4588337923959841e-05, "loss": 3.321816253662109, "step": 33580 }, { "epoch": 0.2711429332515357, "grad_norm": 1.1009312868118286, "learning_rate": 1.4586722454221627e-05, "loss": 3.0038734436035157, "step": 33590 }, { "epoch": 0.2712236545773028, "grad_norm": 0.8768975734710693, "learning_rate": 1.4585106984483415e-05, "loss": 2.95401611328125, "step": 33600 }, { "epoch": 0.2713043759030698, "grad_norm": 1.3353978395462036, "learning_rate": 1.45834915147452e-05, "loss": 2.704306983947754, "step": 33610 }, { "epoch": 0.2713850972288369, "grad_norm": 1.2915922403335571, "learning_rate": 1.4581876045006988e-05, "loss": 3.045139503479004, "step": 33620 }, { "epoch": 0.27146581855460394, "grad_norm": 1.110945463180542, "learning_rate": 1.4580260575268774e-05, "loss": 3.1043184280395506, "step": 33630 }, { "epoch": 0.271546539880371, "grad_norm": 0.7099101543426514, "learning_rate": 1.4578645105530562e-05, "loss": 3.394868087768555, "step": 33640 }, { "epoch": 0.27162726120613806, "grad_norm": 1.3017743825912476, "learning_rate": 1.4577029635792348e-05, "loss": 2.9133249282836915, "step": 33650 }, { "epoch": 0.2717079825319051, "grad_norm": 0.8164697289466858, "learning_rate": 1.4575414166054136e-05, "loss": 3.2004085540771485, "step": 33660 }, { "epoch": 0.2717887038576722, "grad_norm": 0.7445421814918518, "learning_rate": 1.4573798696315922e-05, "loss": 3.0087005615234377, "step": 33670 }, { "epoch": 0.2718694251834392, "grad_norm": 0.837309718132019, "learning_rate": 1.457218322657771e-05, "loss": 2.9125225067138674, "step": 33680 }, { "epoch": 0.2719501465092063, "grad_norm": 1.3421190977096558, "learning_rate": 1.4570567756839495e-05, "loss": 3.1272254943847657, "step": 33690 }, { "epoch": 0.2720308678349733, "grad_norm": 1.1043198108673096, "learning_rate": 1.4568952287101283e-05, "loss": 3.0920745849609377, "step": 33700 }, { "epoch": 0.27211158916074035, "grad_norm": 1.158064365386963, "learning_rate": 1.4567336817363069e-05, "loss": 2.999062919616699, "step": 33710 }, { "epoch": 0.27219231048650744, "grad_norm": 0.752718985080719, "learning_rate": 1.4565721347624857e-05, "loss": 2.9702665328979494, "step": 33720 }, { "epoch": 0.27227303181227447, "grad_norm": 2.698854684829712, "learning_rate": 1.4564105877886643e-05, "loss": 3.45650634765625, "step": 33730 }, { "epoch": 0.27235375313804155, "grad_norm": 0.5226702690124512, "learning_rate": 1.456249040814843e-05, "loss": 2.6448850631713867, "step": 33740 }, { "epoch": 0.2724344744638086, "grad_norm": 0.9187614321708679, "learning_rate": 1.4560874938410217e-05, "loss": 2.735503387451172, "step": 33750 }, { "epoch": 0.27251519578957567, "grad_norm": 0.8881615400314331, "learning_rate": 1.4559259468672004e-05, "loss": 2.8495403289794923, "step": 33760 }, { "epoch": 0.2725959171153427, "grad_norm": 0.9288269281387329, "learning_rate": 1.455764399893379e-05, "loss": 3.3410877227783202, "step": 33770 }, { "epoch": 0.27267663844110973, "grad_norm": 1.1272443532943726, "learning_rate": 1.4556028529195578e-05, "loss": 3.0111034393310545, "step": 33780 }, { "epoch": 0.2727573597668768, "grad_norm": 1.4218275547027588, "learning_rate": 1.4554413059457364e-05, "loss": 3.0572113037109374, "step": 33790 }, { "epoch": 0.27283808109264385, "grad_norm": 1.065224289894104, "learning_rate": 1.4552797589719152e-05, "loss": 2.848491096496582, "step": 33800 }, { "epoch": 0.27291880241841093, "grad_norm": 1.3938039541244507, "learning_rate": 1.4551182119980938e-05, "loss": 3.838572311401367, "step": 33810 }, { "epoch": 0.27299952374417796, "grad_norm": 0.9653141498565674, "learning_rate": 1.4549566650242725e-05, "loss": 3.0718414306640627, "step": 33820 }, { "epoch": 0.27308024506994505, "grad_norm": 0.8950255513191223, "learning_rate": 1.4547951180504511e-05, "loss": 3.330341339111328, "step": 33830 }, { "epoch": 0.2731609663957121, "grad_norm": 1.226499319076538, "learning_rate": 1.4546335710766299e-05, "loss": 2.8706029891967773, "step": 33840 }, { "epoch": 0.27324168772147917, "grad_norm": 0.9760552644729614, "learning_rate": 1.4544720241028085e-05, "loss": 3.529132080078125, "step": 33850 }, { "epoch": 0.2733224090472462, "grad_norm": 1.1793454885482788, "learning_rate": 1.4543104771289873e-05, "loss": 3.18043212890625, "step": 33860 }, { "epoch": 0.2734031303730132, "grad_norm": 0.9775580763816833, "learning_rate": 1.4541489301551659e-05, "loss": 3.7040210723876954, "step": 33870 }, { "epoch": 0.2734838516987803, "grad_norm": 1.7142986059188843, "learning_rate": 1.4539873831813446e-05, "loss": 3.214267349243164, "step": 33880 }, { "epoch": 0.27356457302454734, "grad_norm": 0.8382578492164612, "learning_rate": 1.4538258362075232e-05, "loss": 3.0128307342529297, "step": 33890 }, { "epoch": 0.2736452943503144, "grad_norm": 1.2678098678588867, "learning_rate": 1.453664289233702e-05, "loss": 3.138922119140625, "step": 33900 }, { "epoch": 0.27372601567608146, "grad_norm": 0.9144306182861328, "learning_rate": 1.4535027422598806e-05, "loss": 3.0727975845336912, "step": 33910 }, { "epoch": 0.27380673700184854, "grad_norm": 0.6235559582710266, "learning_rate": 1.4533411952860594e-05, "loss": 3.1092119216918945, "step": 33920 }, { "epoch": 0.2738874583276156, "grad_norm": 1.2133489847183228, "learning_rate": 1.453179648312238e-05, "loss": 2.6995965957641603, "step": 33930 }, { "epoch": 0.2739681796533826, "grad_norm": 1.0394723415374756, "learning_rate": 1.4530181013384167e-05, "loss": 3.4366081237792967, "step": 33940 }, { "epoch": 0.2740489009791497, "grad_norm": 1.2754515409469604, "learning_rate": 1.4528565543645953e-05, "loss": 3.38670654296875, "step": 33950 }, { "epoch": 0.2741296223049167, "grad_norm": 1.3912960290908813, "learning_rate": 1.4526950073907741e-05, "loss": 3.500501251220703, "step": 33960 }, { "epoch": 0.2742103436306838, "grad_norm": 1.2731388807296753, "learning_rate": 1.4525334604169527e-05, "loss": 2.982891082763672, "step": 33970 }, { "epoch": 0.27429106495645084, "grad_norm": 0.9280473589897156, "learning_rate": 1.4523719134431315e-05, "loss": 2.675425910949707, "step": 33980 }, { "epoch": 0.2743717862822179, "grad_norm": 1.10303795337677, "learning_rate": 1.45221036646931e-05, "loss": 2.9866863250732423, "step": 33990 }, { "epoch": 0.27445250760798495, "grad_norm": 0.9759531617164612, "learning_rate": 1.4520488194954888e-05, "loss": 2.766561508178711, "step": 34000 }, { "epoch": 0.274533228933752, "grad_norm": 0.7101432085037231, "learning_rate": 1.4518872725216674e-05, "loss": 3.1392963409423826, "step": 34010 }, { "epoch": 0.27461395025951907, "grad_norm": 1.0462074279785156, "learning_rate": 1.4517257255478462e-05, "loss": 2.7684030532836914, "step": 34020 }, { "epoch": 0.2746946715852861, "grad_norm": 0.9644880890846252, "learning_rate": 1.4515641785740248e-05, "loss": 3.069379997253418, "step": 34030 }, { "epoch": 0.2747753929110532, "grad_norm": 0.8247462511062622, "learning_rate": 1.4514026316002036e-05, "loss": 3.0074960708618166, "step": 34040 }, { "epoch": 0.2748561142368202, "grad_norm": 1.1110472679138184, "learning_rate": 1.4512410846263822e-05, "loss": 3.075351524353027, "step": 34050 }, { "epoch": 0.2749368355625873, "grad_norm": 1.7665358781814575, "learning_rate": 1.4510795376525611e-05, "loss": 2.968762016296387, "step": 34060 }, { "epoch": 0.27501755688835433, "grad_norm": 0.9006401896476746, "learning_rate": 1.4509179906787396e-05, "loss": 3.069222259521484, "step": 34070 }, { "epoch": 0.27509827821412136, "grad_norm": 0.65992271900177, "learning_rate": 1.4507564437049185e-05, "loss": 3.0004886627197265, "step": 34080 }, { "epoch": 0.27517899953988845, "grad_norm": 1.0536025762557983, "learning_rate": 1.4505948967310973e-05, "loss": 3.3089859008789064, "step": 34090 }, { "epoch": 0.2752597208656555, "grad_norm": 0.8452950716018677, "learning_rate": 1.4504333497572759e-05, "loss": 3.233297348022461, "step": 34100 }, { "epoch": 0.27534044219142256, "grad_norm": 1.029802918434143, "learning_rate": 1.4502718027834546e-05, "loss": 3.596366119384766, "step": 34110 }, { "epoch": 0.2754211635171896, "grad_norm": 0.8293606638908386, "learning_rate": 1.4501102558096332e-05, "loss": 2.8698902130126953, "step": 34120 }, { "epoch": 0.2755018848429567, "grad_norm": 0.9616727232933044, "learning_rate": 1.449948708835812e-05, "loss": 3.0391260147094727, "step": 34130 }, { "epoch": 0.2755826061687237, "grad_norm": 1.1518819332122803, "learning_rate": 1.4497871618619906e-05, "loss": 3.103318786621094, "step": 34140 }, { "epoch": 0.2756633274944908, "grad_norm": 0.8939857482910156, "learning_rate": 1.4496256148881694e-05, "loss": 2.916057586669922, "step": 34150 }, { "epoch": 0.2757440488202578, "grad_norm": 1.0519282817840576, "learning_rate": 1.449464067914348e-05, "loss": 2.8672204971313477, "step": 34160 }, { "epoch": 0.27582477014602486, "grad_norm": 0.7459799647331238, "learning_rate": 1.4493025209405267e-05, "loss": 3.297083282470703, "step": 34170 }, { "epoch": 0.27590549147179194, "grad_norm": 0.613688588142395, "learning_rate": 1.4491409739667053e-05, "loss": 2.9058914184570312, "step": 34180 }, { "epoch": 0.27598621279755897, "grad_norm": 2.221433162689209, "learning_rate": 1.4489794269928841e-05, "loss": 3.123293876647949, "step": 34190 }, { "epoch": 0.27606693412332606, "grad_norm": 1.1224631071090698, "learning_rate": 1.4488178800190627e-05, "loss": 3.088726043701172, "step": 34200 }, { "epoch": 0.2761476554490931, "grad_norm": 1.0700623989105225, "learning_rate": 1.4486563330452415e-05, "loss": 2.7381683349609376, "step": 34210 }, { "epoch": 0.2762283767748602, "grad_norm": 1.0006861686706543, "learning_rate": 1.44849478607142e-05, "loss": 3.0081449508666993, "step": 34220 }, { "epoch": 0.2763090981006272, "grad_norm": 0.7171394228935242, "learning_rate": 1.4483332390975988e-05, "loss": 2.7052902221679687, "step": 34230 }, { "epoch": 0.27638981942639423, "grad_norm": 0.8150771260261536, "learning_rate": 1.4481716921237774e-05, "loss": 2.9264495849609373, "step": 34240 }, { "epoch": 0.2764705407521613, "grad_norm": 0.8691646456718445, "learning_rate": 1.4480101451499562e-05, "loss": 3.0903491973876953, "step": 34250 }, { "epoch": 0.27655126207792835, "grad_norm": 1.0442912578582764, "learning_rate": 1.4478485981761348e-05, "loss": 2.8794248580932615, "step": 34260 }, { "epoch": 0.27663198340369544, "grad_norm": 0.7251247763633728, "learning_rate": 1.4476870512023136e-05, "loss": 3.550643539428711, "step": 34270 }, { "epoch": 0.27671270472946247, "grad_norm": 1.0092881917953491, "learning_rate": 1.4475255042284922e-05, "loss": 3.2484451293945313, "step": 34280 }, { "epoch": 0.27679342605522955, "grad_norm": 0.601344108581543, "learning_rate": 1.447363957254671e-05, "loss": 2.7372062683105467, "step": 34290 }, { "epoch": 0.2768741473809966, "grad_norm": 1.0290247201919556, "learning_rate": 1.4472024102808496e-05, "loss": 2.964112663269043, "step": 34300 }, { "epoch": 0.2769548687067636, "grad_norm": 1.3007965087890625, "learning_rate": 1.4470408633070283e-05, "loss": 3.2842411041259765, "step": 34310 }, { "epoch": 0.2770355900325307, "grad_norm": 2.9878876209259033, "learning_rate": 1.446879316333207e-05, "loss": 3.506488037109375, "step": 34320 }, { "epoch": 0.27711631135829773, "grad_norm": 0.7875100374221802, "learning_rate": 1.4467177693593857e-05, "loss": 3.10501651763916, "step": 34330 }, { "epoch": 0.2771970326840648, "grad_norm": 0.7555063962936401, "learning_rate": 1.4465562223855643e-05, "loss": 2.7239755630493163, "step": 34340 }, { "epoch": 0.27727775400983184, "grad_norm": 1.1582322120666504, "learning_rate": 1.446394675411743e-05, "loss": 2.8723506927490234, "step": 34350 }, { "epoch": 0.27735847533559893, "grad_norm": 0.8354750275611877, "learning_rate": 1.4462331284379217e-05, "loss": 2.95153751373291, "step": 34360 }, { "epoch": 0.27743919666136596, "grad_norm": 1.486870288848877, "learning_rate": 1.4460715814641004e-05, "loss": 2.821464729309082, "step": 34370 }, { "epoch": 0.27751991798713305, "grad_norm": 1.3865889310836792, "learning_rate": 1.445910034490279e-05, "loss": 3.257933807373047, "step": 34380 }, { "epoch": 0.2776006393129001, "grad_norm": 1.2340840101242065, "learning_rate": 1.4457484875164578e-05, "loss": 3.4661796569824217, "step": 34390 }, { "epoch": 0.2776813606386671, "grad_norm": 0.5618912577629089, "learning_rate": 1.4455869405426364e-05, "loss": 2.8701828002929686, "step": 34400 }, { "epoch": 0.2777620819644342, "grad_norm": 0.5949578881263733, "learning_rate": 1.4454253935688152e-05, "loss": 2.740652847290039, "step": 34410 }, { "epoch": 0.2778428032902012, "grad_norm": 0.6750642657279968, "learning_rate": 1.4452638465949938e-05, "loss": 3.104547691345215, "step": 34420 }, { "epoch": 0.2779235246159683, "grad_norm": 1.6414434909820557, "learning_rate": 1.4451022996211725e-05, "loss": 2.914539909362793, "step": 34430 }, { "epoch": 0.27800424594173534, "grad_norm": 0.8793669939041138, "learning_rate": 1.4449407526473511e-05, "loss": 3.9294509887695312, "step": 34440 }, { "epoch": 0.2780849672675024, "grad_norm": 0.9226410388946533, "learning_rate": 1.4447792056735299e-05, "loss": 2.888245391845703, "step": 34450 }, { "epoch": 0.27816568859326946, "grad_norm": 0.6428408026695251, "learning_rate": 1.4446176586997085e-05, "loss": 3.2008296966552736, "step": 34460 }, { "epoch": 0.2782464099190365, "grad_norm": 0.8281367421150208, "learning_rate": 1.4444561117258873e-05, "loss": 3.3448410034179688, "step": 34470 }, { "epoch": 0.27832713124480357, "grad_norm": 0.8533666133880615, "learning_rate": 1.4442945647520659e-05, "loss": 3.184978485107422, "step": 34480 }, { "epoch": 0.2784078525705706, "grad_norm": 1.099441647529602, "learning_rate": 1.4441330177782446e-05, "loss": 3.3693702697753904, "step": 34490 }, { "epoch": 0.2784885738963377, "grad_norm": 0.9352798461914062, "learning_rate": 1.4439714708044232e-05, "loss": 3.230387496948242, "step": 34500 }, { "epoch": 0.2785692952221047, "grad_norm": 1.2853031158447266, "learning_rate": 1.443809923830602e-05, "loss": 2.8621763229370116, "step": 34510 }, { "epoch": 0.2786500165478718, "grad_norm": 0.5773811936378479, "learning_rate": 1.4436483768567806e-05, "loss": 2.552690315246582, "step": 34520 }, { "epoch": 0.27873073787363883, "grad_norm": 0.8155794739723206, "learning_rate": 1.4434868298829594e-05, "loss": 3.332936096191406, "step": 34530 }, { "epoch": 0.27881145919940586, "grad_norm": 5.072231769561768, "learning_rate": 1.443325282909138e-05, "loss": 3.206827926635742, "step": 34540 }, { "epoch": 0.27889218052517295, "grad_norm": 0.7355083227157593, "learning_rate": 1.4431637359353168e-05, "loss": 3.2028457641601564, "step": 34550 }, { "epoch": 0.27897290185094, "grad_norm": 1.0115960836410522, "learning_rate": 1.4430021889614954e-05, "loss": 2.896937942504883, "step": 34560 }, { "epoch": 0.27905362317670707, "grad_norm": 1.0732566118240356, "learning_rate": 1.4428406419876741e-05, "loss": 2.999220085144043, "step": 34570 }, { "epoch": 0.2791343445024741, "grad_norm": 0.9800414443016052, "learning_rate": 1.4426790950138527e-05, "loss": 2.923992156982422, "step": 34580 }, { "epoch": 0.2792150658282412, "grad_norm": 2.2030906677246094, "learning_rate": 1.4425175480400315e-05, "loss": 3.0536739349365236, "step": 34590 }, { "epoch": 0.2792957871540082, "grad_norm": 1.0080803632736206, "learning_rate": 1.4423560010662101e-05, "loss": 2.8780126571655273, "step": 34600 }, { "epoch": 0.2793765084797753, "grad_norm": 0.8126186728477478, "learning_rate": 1.4421944540923889e-05, "loss": 3.214009094238281, "step": 34610 }, { "epoch": 0.27945722980554233, "grad_norm": 1.0632363557815552, "learning_rate": 1.4420329071185675e-05, "loss": 2.645926856994629, "step": 34620 }, { "epoch": 0.27953795113130936, "grad_norm": 0.9397959113121033, "learning_rate": 1.4418713601447462e-05, "loss": 3.054620361328125, "step": 34630 }, { "epoch": 0.27961867245707644, "grad_norm": 1.5131170749664307, "learning_rate": 1.4417098131709248e-05, "loss": 3.4440502166748046, "step": 34640 }, { "epoch": 0.2796993937828435, "grad_norm": 1.2697027921676636, "learning_rate": 1.4415482661971036e-05, "loss": 3.6351268768310545, "step": 34650 }, { "epoch": 0.27978011510861056, "grad_norm": 1.2599010467529297, "learning_rate": 1.4413867192232822e-05, "loss": 2.8674659729003906, "step": 34660 }, { "epoch": 0.2798608364343776, "grad_norm": 1.5731253623962402, "learning_rate": 1.441225172249461e-05, "loss": 3.296830749511719, "step": 34670 }, { "epoch": 0.2799415577601447, "grad_norm": 1.3410966396331787, "learning_rate": 1.4410636252756396e-05, "loss": 3.1227067947387694, "step": 34680 }, { "epoch": 0.2800222790859117, "grad_norm": 0.929814875125885, "learning_rate": 1.4409020783018183e-05, "loss": 3.343854522705078, "step": 34690 }, { "epoch": 0.28010300041167874, "grad_norm": 0.7942084074020386, "learning_rate": 1.440740531327997e-05, "loss": 3.3634078979492186, "step": 34700 }, { "epoch": 0.2801837217374458, "grad_norm": 0.9839335083961487, "learning_rate": 1.4405789843541757e-05, "loss": 3.3490215301513673, "step": 34710 }, { "epoch": 0.28026444306321285, "grad_norm": 1.4160301685333252, "learning_rate": 1.4404174373803543e-05, "loss": 2.887060546875, "step": 34720 }, { "epoch": 0.28034516438897994, "grad_norm": 1.0892279148101807, "learning_rate": 1.440255890406533e-05, "loss": 2.9834129333496096, "step": 34730 }, { "epoch": 0.28042588571474697, "grad_norm": 1.0173660516738892, "learning_rate": 1.4400943434327117e-05, "loss": 3.163957214355469, "step": 34740 }, { "epoch": 0.28050660704051406, "grad_norm": 1.4578152894973755, "learning_rate": 1.4399327964588904e-05, "loss": 3.1446762084960938, "step": 34750 }, { "epoch": 0.2805873283662811, "grad_norm": 1.0629899501800537, "learning_rate": 1.439771249485069e-05, "loss": 3.400893783569336, "step": 34760 }, { "epoch": 0.2806680496920481, "grad_norm": 1.0946506261825562, "learning_rate": 1.4396097025112478e-05, "loss": 3.0035942077636717, "step": 34770 }, { "epoch": 0.2807487710178152, "grad_norm": 0.6818959712982178, "learning_rate": 1.4394481555374264e-05, "loss": 3.1108078002929687, "step": 34780 }, { "epoch": 0.28082949234358223, "grad_norm": 0.8575376868247986, "learning_rate": 1.4392866085636052e-05, "loss": 2.961878204345703, "step": 34790 }, { "epoch": 0.2809102136693493, "grad_norm": 1.2608622312545776, "learning_rate": 1.4391250615897838e-05, "loss": 2.760202980041504, "step": 34800 }, { "epoch": 0.28099093499511635, "grad_norm": 0.9500030875205994, "learning_rate": 1.4389635146159626e-05, "loss": 3.2779273986816406, "step": 34810 }, { "epoch": 0.28107165632088343, "grad_norm": 1.2881910800933838, "learning_rate": 1.4388019676421412e-05, "loss": 3.785016632080078, "step": 34820 }, { "epoch": 0.28115237764665046, "grad_norm": 1.0976639986038208, "learning_rate": 1.43864042066832e-05, "loss": 3.2680484771728517, "step": 34830 }, { "epoch": 0.28123309897241755, "grad_norm": 0.751026451587677, "learning_rate": 1.4384788736944985e-05, "loss": 3.3456035614013673, "step": 34840 }, { "epoch": 0.2813138202981846, "grad_norm": 1.420547366142273, "learning_rate": 1.4383173267206773e-05, "loss": 3.334029769897461, "step": 34850 }, { "epoch": 0.2813945416239516, "grad_norm": 1.2920000553131104, "learning_rate": 1.4381557797468559e-05, "loss": 3.2056659698486327, "step": 34860 }, { "epoch": 0.2814752629497187, "grad_norm": 0.5876786112785339, "learning_rate": 1.4379942327730347e-05, "loss": 2.9586551666259764, "step": 34870 }, { "epoch": 0.2815559842754857, "grad_norm": 0.938325047492981, "learning_rate": 1.4378326857992133e-05, "loss": 2.9765161514282226, "step": 34880 }, { "epoch": 0.2816367056012528, "grad_norm": 1.159692645072937, "learning_rate": 1.437671138825392e-05, "loss": 2.866716194152832, "step": 34890 }, { "epoch": 0.28171742692701984, "grad_norm": 1.0532395839691162, "learning_rate": 1.4375095918515706e-05, "loss": 2.613214111328125, "step": 34900 }, { "epoch": 0.28179814825278693, "grad_norm": 1.0665079355239868, "learning_rate": 1.4373480448777494e-05, "loss": 2.993980598449707, "step": 34910 }, { "epoch": 0.28187886957855396, "grad_norm": 0.9859315752983093, "learning_rate": 1.437186497903928e-05, "loss": 2.9471036911010744, "step": 34920 }, { "epoch": 0.281959590904321, "grad_norm": 0.992637038230896, "learning_rate": 1.437024950930107e-05, "loss": 3.201905059814453, "step": 34930 }, { "epoch": 0.2820403122300881, "grad_norm": 0.9469434022903442, "learning_rate": 1.4368634039562854e-05, "loss": 2.9874713897705076, "step": 34940 }, { "epoch": 0.2821210335558551, "grad_norm": 0.8887361884117126, "learning_rate": 1.4367018569824643e-05, "loss": 2.9256624221801757, "step": 34950 }, { "epoch": 0.2822017548816222, "grad_norm": 0.8724387288093567, "learning_rate": 1.4365403100086427e-05, "loss": 3.069154167175293, "step": 34960 }, { "epoch": 0.2822824762073892, "grad_norm": 1.6888898611068726, "learning_rate": 1.4363787630348217e-05, "loss": 2.9790485382080076, "step": 34970 }, { "epoch": 0.2823631975331563, "grad_norm": 0.9945788979530334, "learning_rate": 1.4362172160610001e-05, "loss": 3.366192626953125, "step": 34980 }, { "epoch": 0.28244391885892334, "grad_norm": 0.9233502745628357, "learning_rate": 1.436055669087179e-05, "loss": 3.2850643157958985, "step": 34990 }, { "epoch": 0.28252464018469037, "grad_norm": 0.8591418266296387, "learning_rate": 1.4358941221133575e-05, "loss": 3.16189022064209, "step": 35000 }, { "epoch": 0.28260536151045745, "grad_norm": 1.0312259197235107, "learning_rate": 1.4357325751395364e-05, "loss": 2.8323553085327147, "step": 35010 }, { "epoch": 0.2826860828362245, "grad_norm": 1.1786532402038574, "learning_rate": 1.4355710281657148e-05, "loss": 3.146417999267578, "step": 35020 }, { "epoch": 0.28276680416199157, "grad_norm": 0.5924095511436462, "learning_rate": 1.4354094811918938e-05, "loss": 3.4026512145996093, "step": 35030 }, { "epoch": 0.2828475254877586, "grad_norm": 1.175093173980713, "learning_rate": 1.4352479342180722e-05, "loss": 2.752153015136719, "step": 35040 }, { "epoch": 0.2829282468135257, "grad_norm": 1.071495771408081, "learning_rate": 1.4350863872442512e-05, "loss": 2.9628322601318358, "step": 35050 }, { "epoch": 0.2830089681392927, "grad_norm": 0.8439601063728333, "learning_rate": 1.4349248402704296e-05, "loss": 2.754471206665039, "step": 35060 }, { "epoch": 0.2830896894650598, "grad_norm": 0.673981785774231, "learning_rate": 1.4347632932966085e-05, "loss": 3.4360321044921873, "step": 35070 }, { "epoch": 0.28317041079082683, "grad_norm": 0.7123093605041504, "learning_rate": 1.434601746322787e-05, "loss": 2.8667150497436524, "step": 35080 }, { "epoch": 0.28325113211659386, "grad_norm": 1.358691930770874, "learning_rate": 1.4344401993489659e-05, "loss": 3.244794464111328, "step": 35090 }, { "epoch": 0.28333185344236095, "grad_norm": 1.2490490674972534, "learning_rate": 1.4342786523751445e-05, "loss": 3.0326618194580077, "step": 35100 }, { "epoch": 0.283412574768128, "grad_norm": 0.7001484036445618, "learning_rate": 1.4341171054013233e-05, "loss": 3.1127525329589845, "step": 35110 }, { "epoch": 0.28349329609389506, "grad_norm": 0.616722822189331, "learning_rate": 1.4339555584275019e-05, "loss": 3.3258018493652344, "step": 35120 }, { "epoch": 0.2835740174196621, "grad_norm": 1.103458046913147, "learning_rate": 1.4337940114536806e-05, "loss": 3.206351470947266, "step": 35130 }, { "epoch": 0.2836547387454292, "grad_norm": 1.045082926750183, "learning_rate": 1.4336324644798592e-05, "loss": 3.074252128601074, "step": 35140 }, { "epoch": 0.2837354600711962, "grad_norm": 0.7939634323120117, "learning_rate": 1.433470917506038e-05, "loss": 2.7353483200073243, "step": 35150 }, { "epoch": 0.28381618139696324, "grad_norm": 1.1400234699249268, "learning_rate": 1.4333093705322166e-05, "loss": 2.8703441619873047, "step": 35160 }, { "epoch": 0.2838969027227303, "grad_norm": 0.7787342667579651, "learning_rate": 1.4331478235583954e-05, "loss": 2.873427963256836, "step": 35170 }, { "epoch": 0.28397762404849736, "grad_norm": 1.0550535917282104, "learning_rate": 1.432986276584574e-05, "loss": 3.5482540130615234, "step": 35180 }, { "epoch": 0.28405834537426444, "grad_norm": 1.044517159461975, "learning_rate": 1.4328247296107527e-05, "loss": 3.5084468841552736, "step": 35190 }, { "epoch": 0.2841390667000315, "grad_norm": 2.3875458240509033, "learning_rate": 1.4326631826369313e-05, "loss": 3.5330272674560548, "step": 35200 }, { "epoch": 0.28421978802579856, "grad_norm": 0.7710232138633728, "learning_rate": 1.4325016356631101e-05, "loss": 2.8041255950927733, "step": 35210 }, { "epoch": 0.2843005093515656, "grad_norm": 0.7939587235450745, "learning_rate": 1.4323400886892887e-05, "loss": 3.2888660430908203, "step": 35220 }, { "epoch": 0.2843812306773326, "grad_norm": 1.7782809734344482, "learning_rate": 1.4321785417154675e-05, "loss": 2.9706350326538087, "step": 35230 }, { "epoch": 0.2844619520030997, "grad_norm": 1.3048279285430908, "learning_rate": 1.432016994741646e-05, "loss": 2.8264135360717773, "step": 35240 }, { "epoch": 0.28454267332886674, "grad_norm": 0.7989057898521423, "learning_rate": 1.4318554477678248e-05, "loss": 3.1384050369262697, "step": 35250 }, { "epoch": 0.2846233946546338, "grad_norm": 1.2311800718307495, "learning_rate": 1.4316939007940034e-05, "loss": 3.058425712585449, "step": 35260 }, { "epoch": 0.28470411598040085, "grad_norm": 1.232054352760315, "learning_rate": 1.4315323538201822e-05, "loss": 2.8903409957885744, "step": 35270 }, { "epoch": 0.28478483730616794, "grad_norm": 1.2255079746246338, "learning_rate": 1.4313708068463608e-05, "loss": 2.8312314987182616, "step": 35280 }, { "epoch": 0.28486555863193497, "grad_norm": 0.6238853931427002, "learning_rate": 1.4312092598725396e-05, "loss": 3.10388069152832, "step": 35290 }, { "epoch": 0.284946279957702, "grad_norm": 1.2941559553146362, "learning_rate": 1.4310477128987182e-05, "loss": 3.1999101638793945, "step": 35300 }, { "epoch": 0.2850270012834691, "grad_norm": 1.0623295307159424, "learning_rate": 1.430886165924897e-05, "loss": 2.7209497451782227, "step": 35310 }, { "epoch": 0.2851077226092361, "grad_norm": 0.8949081897735596, "learning_rate": 1.4307246189510756e-05, "loss": 2.8347665786743166, "step": 35320 }, { "epoch": 0.2851884439350032, "grad_norm": 0.5168116688728333, "learning_rate": 1.4305630719772543e-05, "loss": 3.0493988037109374, "step": 35330 }, { "epoch": 0.28526916526077023, "grad_norm": 1.0746681690216064, "learning_rate": 1.4304015250034331e-05, "loss": 3.1625045776367187, "step": 35340 }, { "epoch": 0.2853498865865373, "grad_norm": 2.0336203575134277, "learning_rate": 1.4302399780296117e-05, "loss": 3.409267807006836, "step": 35350 }, { "epoch": 0.28543060791230435, "grad_norm": 0.9620830416679382, "learning_rate": 1.4300784310557905e-05, "loss": 3.123200798034668, "step": 35360 }, { "epoch": 0.28551132923807143, "grad_norm": 1.1469749212265015, "learning_rate": 1.429916884081969e-05, "loss": 2.839877700805664, "step": 35370 }, { "epoch": 0.28559205056383846, "grad_norm": 0.965155839920044, "learning_rate": 1.4297553371081478e-05, "loss": 3.2427528381347654, "step": 35380 }, { "epoch": 0.2856727718896055, "grad_norm": 0.8271947503089905, "learning_rate": 1.4295937901343264e-05, "loss": 2.7426342010498046, "step": 35390 }, { "epoch": 0.2857534932153726, "grad_norm": 0.6416189670562744, "learning_rate": 1.4294322431605052e-05, "loss": 2.718648147583008, "step": 35400 }, { "epoch": 0.2858342145411396, "grad_norm": 0.9093154668807983, "learning_rate": 1.4292706961866838e-05, "loss": 3.0153631210327148, "step": 35410 }, { "epoch": 0.2859149358669067, "grad_norm": 0.9384422898292542, "learning_rate": 1.4291091492128626e-05, "loss": 2.9881664276123048, "step": 35420 }, { "epoch": 0.2859956571926737, "grad_norm": 0.5062437653541565, "learning_rate": 1.4289476022390412e-05, "loss": 2.996298980712891, "step": 35430 }, { "epoch": 0.2860763785184408, "grad_norm": 0.9151896238327026, "learning_rate": 1.42878605526522e-05, "loss": 2.869973373413086, "step": 35440 }, { "epoch": 0.28615709984420784, "grad_norm": 1.2284597158432007, "learning_rate": 1.4286245082913985e-05, "loss": 3.103036880493164, "step": 35450 }, { "epoch": 0.28623782116997487, "grad_norm": 0.6174798011779785, "learning_rate": 1.4284629613175773e-05, "loss": 3.043814468383789, "step": 35460 }, { "epoch": 0.28631854249574196, "grad_norm": 0.6710684895515442, "learning_rate": 1.4283014143437559e-05, "loss": 3.105416679382324, "step": 35470 }, { "epoch": 0.286399263821509, "grad_norm": 0.9266555309295654, "learning_rate": 1.4281398673699347e-05, "loss": 3.2679920196533203, "step": 35480 }, { "epoch": 0.2864799851472761, "grad_norm": 1.0829178094863892, "learning_rate": 1.4279783203961133e-05, "loss": 3.2499488830566405, "step": 35490 }, { "epoch": 0.2865607064730431, "grad_norm": 2.6925253868103027, "learning_rate": 1.427816773422292e-05, "loss": 3.7456932067871094, "step": 35500 }, { "epoch": 0.2866414277988102, "grad_norm": 0.7814476490020752, "learning_rate": 1.4276552264484706e-05, "loss": 3.1560009002685545, "step": 35510 }, { "epoch": 0.2867221491245772, "grad_norm": 1.2791670560836792, "learning_rate": 1.4274936794746494e-05, "loss": 3.0909774780273436, "step": 35520 }, { "epoch": 0.28680287045034425, "grad_norm": 0.9332887530326843, "learning_rate": 1.427332132500828e-05, "loss": 2.9439563751220703, "step": 35530 }, { "epoch": 0.28688359177611134, "grad_norm": 0.9813966751098633, "learning_rate": 1.4271705855270068e-05, "loss": 3.042230987548828, "step": 35540 }, { "epoch": 0.28696431310187837, "grad_norm": 0.8191531896591187, "learning_rate": 1.4270090385531854e-05, "loss": 2.8612644195556642, "step": 35550 }, { "epoch": 0.28704503442764545, "grad_norm": 0.9395809173583984, "learning_rate": 1.4268474915793642e-05, "loss": 3.171925354003906, "step": 35560 }, { "epoch": 0.2871257557534125, "grad_norm": 1.620546579360962, "learning_rate": 1.4266859446055428e-05, "loss": 3.174528884887695, "step": 35570 }, { "epoch": 0.28720647707917957, "grad_norm": 0.9689822196960449, "learning_rate": 1.4265243976317215e-05, "loss": 3.1982044219970702, "step": 35580 }, { "epoch": 0.2872871984049466, "grad_norm": 0.8392884731292725, "learning_rate": 1.4263628506579001e-05, "loss": 3.4767581939697267, "step": 35590 }, { "epoch": 0.2873679197307137, "grad_norm": 1.5216443538665771, "learning_rate": 1.4262013036840789e-05, "loss": 3.664622497558594, "step": 35600 }, { "epoch": 0.2874486410564807, "grad_norm": 1.2259162664413452, "learning_rate": 1.4260397567102575e-05, "loss": 2.8024469375610352, "step": 35610 }, { "epoch": 0.28752936238224774, "grad_norm": 1.1469972133636475, "learning_rate": 1.4258782097364363e-05, "loss": 3.0765209197998047, "step": 35620 }, { "epoch": 0.28761008370801483, "grad_norm": 1.566759705543518, "learning_rate": 1.4257166627626149e-05, "loss": 2.9325336456298827, "step": 35630 }, { "epoch": 0.28769080503378186, "grad_norm": 1.2204363346099854, "learning_rate": 1.4255551157887936e-05, "loss": 2.9591245651245117, "step": 35640 }, { "epoch": 0.28777152635954895, "grad_norm": 0.5821758508682251, "learning_rate": 1.4253935688149722e-05, "loss": 3.197308158874512, "step": 35650 }, { "epoch": 0.287852247685316, "grad_norm": 1.2036651372909546, "learning_rate": 1.425232021841151e-05, "loss": 2.824081230163574, "step": 35660 }, { "epoch": 0.28793296901108306, "grad_norm": 0.7997781038284302, "learning_rate": 1.4250704748673296e-05, "loss": 3.018899154663086, "step": 35670 }, { "epoch": 0.2880136903368501, "grad_norm": 0.9095338582992554, "learning_rate": 1.4249089278935084e-05, "loss": 3.5762184143066404, "step": 35680 }, { "epoch": 0.2880944116626171, "grad_norm": 1.2421072721481323, "learning_rate": 1.424747380919687e-05, "loss": 2.8491703033447267, "step": 35690 }, { "epoch": 0.2881751329883842, "grad_norm": 0.9372220039367676, "learning_rate": 1.4245858339458657e-05, "loss": 2.705068016052246, "step": 35700 }, { "epoch": 0.28825585431415124, "grad_norm": 0.7642033100128174, "learning_rate": 1.4244242869720443e-05, "loss": 3.1869110107421874, "step": 35710 }, { "epoch": 0.2883365756399183, "grad_norm": 1.0144381523132324, "learning_rate": 1.4242627399982231e-05, "loss": 3.1968597412109374, "step": 35720 }, { "epoch": 0.28841729696568535, "grad_norm": 1.4808865785598755, "learning_rate": 1.4241011930244017e-05, "loss": 3.471403121948242, "step": 35730 }, { "epoch": 0.28849801829145244, "grad_norm": 0.6559258699417114, "learning_rate": 1.4239396460505805e-05, "loss": 3.3482147216796876, "step": 35740 }, { "epoch": 0.28857873961721947, "grad_norm": 1.07831609249115, "learning_rate": 1.423778099076759e-05, "loss": 3.365208053588867, "step": 35750 }, { "epoch": 0.2886594609429865, "grad_norm": 1.1366956233978271, "learning_rate": 1.4236165521029378e-05, "loss": 3.3397010803222655, "step": 35760 }, { "epoch": 0.2887401822687536, "grad_norm": 1.4979921579360962, "learning_rate": 1.4234550051291164e-05, "loss": 2.9631906509399415, "step": 35770 }, { "epoch": 0.2888209035945206, "grad_norm": 1.196316123008728, "learning_rate": 1.4232934581552952e-05, "loss": 3.0247751235961915, "step": 35780 }, { "epoch": 0.2889016249202877, "grad_norm": 1.4189505577087402, "learning_rate": 1.4231319111814738e-05, "loss": 3.3470211029052734, "step": 35790 }, { "epoch": 0.28898234624605473, "grad_norm": 1.3824106454849243, "learning_rate": 1.4229703642076527e-05, "loss": 3.526319885253906, "step": 35800 }, { "epoch": 0.2890630675718218, "grad_norm": 1.2494102716445923, "learning_rate": 1.4228088172338312e-05, "loss": 3.082463836669922, "step": 35810 }, { "epoch": 0.28914378889758885, "grad_norm": 0.6027686595916748, "learning_rate": 1.4226472702600101e-05, "loss": 2.934994125366211, "step": 35820 }, { "epoch": 0.28922451022335594, "grad_norm": 0.960119903087616, "learning_rate": 1.4224857232861885e-05, "loss": 3.2056827545166016, "step": 35830 }, { "epoch": 0.28930523154912297, "grad_norm": 0.866917610168457, "learning_rate": 1.4223241763123675e-05, "loss": 3.3234107971191404, "step": 35840 }, { "epoch": 0.28938595287489, "grad_norm": 1.0077825784683228, "learning_rate": 1.422162629338546e-05, "loss": 3.143813133239746, "step": 35850 }, { "epoch": 0.2894666742006571, "grad_norm": 1.1765730381011963, "learning_rate": 1.4220010823647249e-05, "loss": 3.425856018066406, "step": 35860 }, { "epoch": 0.2895473955264241, "grad_norm": 0.8043423891067505, "learning_rate": 1.4218395353909033e-05, "loss": 2.809158515930176, "step": 35870 }, { "epoch": 0.2896281168521912, "grad_norm": 1.1129043102264404, "learning_rate": 1.4216779884170822e-05, "loss": 2.9011764526367188, "step": 35880 }, { "epoch": 0.28970883817795823, "grad_norm": 0.7887258529663086, "learning_rate": 1.4215164414432607e-05, "loss": 2.7770240783691404, "step": 35890 }, { "epoch": 0.2897895595037253, "grad_norm": 0.9615687131881714, "learning_rate": 1.4213548944694396e-05, "loss": 2.727840042114258, "step": 35900 }, { "epoch": 0.28987028082949234, "grad_norm": 1.3050248622894287, "learning_rate": 1.421193347495618e-05, "loss": 3.1500642776489256, "step": 35910 }, { "epoch": 0.2899510021552594, "grad_norm": 1.862379550933838, "learning_rate": 1.421031800521797e-05, "loss": 2.8364032745361327, "step": 35920 }, { "epoch": 0.29003172348102646, "grad_norm": 0.652298092842102, "learning_rate": 1.4208702535479754e-05, "loss": 3.068897247314453, "step": 35930 }, { "epoch": 0.2901124448067935, "grad_norm": 0.7696560025215149, "learning_rate": 1.4207087065741543e-05, "loss": 3.3622051239013673, "step": 35940 }, { "epoch": 0.2901931661325606, "grad_norm": 1.551881194114685, "learning_rate": 1.4205471596003328e-05, "loss": 2.8768585205078123, "step": 35950 }, { "epoch": 0.2902738874583276, "grad_norm": 0.7150357365608215, "learning_rate": 1.4203856126265117e-05, "loss": 3.3589576721191405, "step": 35960 }, { "epoch": 0.2903546087840947, "grad_norm": 0.9271959662437439, "learning_rate": 1.4202240656526903e-05, "loss": 3.240792465209961, "step": 35970 }, { "epoch": 0.2904353301098617, "grad_norm": 0.9336526989936829, "learning_rate": 1.420062518678869e-05, "loss": 2.9354766845703124, "step": 35980 }, { "epoch": 0.29051605143562875, "grad_norm": 1.2549501657485962, "learning_rate": 1.4199009717050477e-05, "loss": 2.988231086730957, "step": 35990 }, { "epoch": 0.29059677276139584, "grad_norm": 0.83620285987854, "learning_rate": 1.4197394247312264e-05, "loss": 3.0451406478881835, "step": 36000 }, { "epoch": 0.29067749408716287, "grad_norm": 1.2311937808990479, "learning_rate": 1.419577877757405e-05, "loss": 2.9646884918212892, "step": 36010 }, { "epoch": 0.29075821541292995, "grad_norm": 0.8502562046051025, "learning_rate": 1.4194163307835838e-05, "loss": 2.8588512420654295, "step": 36020 }, { "epoch": 0.290838936738697, "grad_norm": 0.5341504216194153, "learning_rate": 1.4192547838097624e-05, "loss": 3.3103981018066406, "step": 36030 }, { "epoch": 0.29091965806446407, "grad_norm": 0.7315739989280701, "learning_rate": 1.4190932368359412e-05, "loss": 3.2867000579833983, "step": 36040 }, { "epoch": 0.2910003793902311, "grad_norm": 1.1319589614868164, "learning_rate": 1.4189316898621198e-05, "loss": 2.8455427169799803, "step": 36050 }, { "epoch": 0.2910811007159982, "grad_norm": 0.787437915802002, "learning_rate": 1.4187701428882985e-05, "loss": 3.0377780914306642, "step": 36060 }, { "epoch": 0.2911618220417652, "grad_norm": 1.08717679977417, "learning_rate": 1.4186085959144771e-05, "loss": 2.851639175415039, "step": 36070 }, { "epoch": 0.29124254336753225, "grad_norm": 1.0384641885757446, "learning_rate": 1.418447048940656e-05, "loss": 3.164501953125, "step": 36080 }, { "epoch": 0.29132326469329933, "grad_norm": 0.8371522426605225, "learning_rate": 1.4182855019668345e-05, "loss": 2.5001989364624024, "step": 36090 }, { "epoch": 0.29140398601906636, "grad_norm": 0.6368069052696228, "learning_rate": 1.4181239549930133e-05, "loss": 2.842228889465332, "step": 36100 }, { "epoch": 0.29148470734483345, "grad_norm": 0.7118096351623535, "learning_rate": 1.4179624080191919e-05, "loss": 3.1088064193725584, "step": 36110 }, { "epoch": 0.2915654286706005, "grad_norm": 1.197310447692871, "learning_rate": 1.4178008610453707e-05, "loss": 3.5539085388183596, "step": 36120 }, { "epoch": 0.29164614999636757, "grad_norm": 1.0595213174819946, "learning_rate": 1.4176393140715493e-05, "loss": 2.972927284240723, "step": 36130 }, { "epoch": 0.2917268713221346, "grad_norm": 0.8465135097503662, "learning_rate": 1.417477767097728e-05, "loss": 3.3462142944335938, "step": 36140 }, { "epoch": 0.2918075926479016, "grad_norm": 0.845618486404419, "learning_rate": 1.4173162201239066e-05, "loss": 2.674283981323242, "step": 36150 }, { "epoch": 0.2918883139736687, "grad_norm": 0.8953932523727417, "learning_rate": 1.4171546731500854e-05, "loss": 3.406109619140625, "step": 36160 }, { "epoch": 0.29196903529943574, "grad_norm": 0.8034064173698425, "learning_rate": 1.416993126176264e-05, "loss": 3.565052795410156, "step": 36170 }, { "epoch": 0.2920497566252028, "grad_norm": 1.0712387561798096, "learning_rate": 1.4168315792024428e-05, "loss": 3.0673261642456056, "step": 36180 }, { "epoch": 0.29213047795096986, "grad_norm": 1.526757836341858, "learning_rate": 1.4166700322286214e-05, "loss": 3.3436630249023436, "step": 36190 }, { "epoch": 0.29221119927673694, "grad_norm": 0.7961556911468506, "learning_rate": 1.4165084852548001e-05, "loss": 3.043319320678711, "step": 36200 }, { "epoch": 0.292291920602504, "grad_norm": 0.9235976934432983, "learning_rate": 1.4163469382809787e-05, "loss": 2.9700557708740236, "step": 36210 }, { "epoch": 0.292372641928271, "grad_norm": 0.7290084362030029, "learning_rate": 1.4161853913071575e-05, "loss": 3.513996124267578, "step": 36220 }, { "epoch": 0.2924533632540381, "grad_norm": 0.9557119011878967, "learning_rate": 1.4160238443333361e-05, "loss": 3.495235061645508, "step": 36230 }, { "epoch": 0.2925340845798051, "grad_norm": 0.8683398962020874, "learning_rate": 1.4158622973595149e-05, "loss": 3.2185718536376955, "step": 36240 }, { "epoch": 0.2926148059055722, "grad_norm": 1.0978245735168457, "learning_rate": 1.4157007503856935e-05, "loss": 3.0320919036865233, "step": 36250 }, { "epoch": 0.29269552723133924, "grad_norm": 0.8831216096878052, "learning_rate": 1.4155392034118722e-05, "loss": 3.0948463439941407, "step": 36260 }, { "epoch": 0.2927762485571063, "grad_norm": 0.9557834267616272, "learning_rate": 1.4153776564380508e-05, "loss": 3.314030075073242, "step": 36270 }, { "epoch": 0.29285696988287335, "grad_norm": 0.9624696373939514, "learning_rate": 1.4152161094642296e-05, "loss": 2.758339691162109, "step": 36280 }, { "epoch": 0.29293769120864044, "grad_norm": 0.9429270625114441, "learning_rate": 1.4150545624904082e-05, "loss": 3.620838165283203, "step": 36290 }, { "epoch": 0.29301841253440747, "grad_norm": 0.6677803993225098, "learning_rate": 1.414893015516587e-05, "loss": 2.746549606323242, "step": 36300 }, { "epoch": 0.2930991338601745, "grad_norm": 1.0268614292144775, "learning_rate": 1.4147314685427656e-05, "loss": 3.1892528533935547, "step": 36310 }, { "epoch": 0.2931798551859416, "grad_norm": 0.9077468514442444, "learning_rate": 1.4145699215689443e-05, "loss": 3.177667999267578, "step": 36320 }, { "epoch": 0.2932605765117086, "grad_norm": 1.5363742113113403, "learning_rate": 1.414408374595123e-05, "loss": 3.2870532989501955, "step": 36330 }, { "epoch": 0.2933412978374757, "grad_norm": 0.6725500226020813, "learning_rate": 1.4142468276213017e-05, "loss": 2.9077280044555662, "step": 36340 }, { "epoch": 0.29342201916324273, "grad_norm": 0.829358696937561, "learning_rate": 1.4140852806474803e-05, "loss": 2.8893962860107423, "step": 36350 }, { "epoch": 0.2935027404890098, "grad_norm": 0.9350390434265137, "learning_rate": 1.4139237336736591e-05, "loss": 3.226683807373047, "step": 36360 }, { "epoch": 0.29358346181477685, "grad_norm": 0.5711862444877625, "learning_rate": 1.4137621866998377e-05, "loss": 2.7419755935668944, "step": 36370 }, { "epoch": 0.2936641831405439, "grad_norm": 1.290592908859253, "learning_rate": 1.4136006397260165e-05, "loss": 3.0352006912231446, "step": 36380 }, { "epoch": 0.29374490446631096, "grad_norm": 0.9312356114387512, "learning_rate": 1.413439092752195e-05, "loss": 3.2085445404052733, "step": 36390 }, { "epoch": 0.293825625792078, "grad_norm": 0.6910510063171387, "learning_rate": 1.4132775457783738e-05, "loss": 2.9794900894165037, "step": 36400 }, { "epoch": 0.2939063471178451, "grad_norm": 1.046708106994629, "learning_rate": 1.4131159988045524e-05, "loss": 3.4196231842041014, "step": 36410 }, { "epoch": 0.2939870684436121, "grad_norm": 1.080678105354309, "learning_rate": 1.4129544518307312e-05, "loss": 2.711966133117676, "step": 36420 }, { "epoch": 0.2940677897693792, "grad_norm": 0.6980420351028442, "learning_rate": 1.4127929048569098e-05, "loss": 2.9922048568725588, "step": 36430 }, { "epoch": 0.2941485110951462, "grad_norm": 0.9956097602844238, "learning_rate": 1.4126313578830886e-05, "loss": 3.293873977661133, "step": 36440 }, { "epoch": 0.29422923242091326, "grad_norm": 0.9288021922111511, "learning_rate": 1.4124698109092672e-05, "loss": 3.3769588470458984, "step": 36450 }, { "epoch": 0.29430995374668034, "grad_norm": 0.9412903189659119, "learning_rate": 1.412308263935446e-05, "loss": 3.2116153717041014, "step": 36460 }, { "epoch": 0.29439067507244737, "grad_norm": 1.4095737934112549, "learning_rate": 1.4121467169616245e-05, "loss": 3.022612762451172, "step": 36470 }, { "epoch": 0.29447139639821446, "grad_norm": 0.5772213339805603, "learning_rate": 1.4119851699878033e-05, "loss": 3.221741485595703, "step": 36480 }, { "epoch": 0.2945521177239815, "grad_norm": 1.407112956047058, "learning_rate": 1.4118236230139819e-05, "loss": 2.901877784729004, "step": 36490 }, { "epoch": 0.2946328390497486, "grad_norm": 0.8361161947250366, "learning_rate": 1.4116620760401607e-05, "loss": 3.1598337173461912, "step": 36500 }, { "epoch": 0.2947135603755156, "grad_norm": 1.2579355239868164, "learning_rate": 1.4115005290663393e-05, "loss": 2.430206298828125, "step": 36510 }, { "epoch": 0.29479428170128263, "grad_norm": 0.5195139646530151, "learning_rate": 1.411338982092518e-05, "loss": 2.9441978454589846, "step": 36520 }, { "epoch": 0.2948750030270497, "grad_norm": 1.2217668294906616, "learning_rate": 1.4111774351186966e-05, "loss": 3.2302082061767576, "step": 36530 }, { "epoch": 0.29495572435281675, "grad_norm": 0.6497024893760681, "learning_rate": 1.4110158881448754e-05, "loss": 2.8506704330444337, "step": 36540 }, { "epoch": 0.29503644567858384, "grad_norm": 1.2769783735275269, "learning_rate": 1.410854341171054e-05, "loss": 2.851662445068359, "step": 36550 }, { "epoch": 0.29511716700435087, "grad_norm": 0.69865882396698, "learning_rate": 1.4106927941972328e-05, "loss": 2.8284481048583983, "step": 36560 }, { "epoch": 0.29519788833011795, "grad_norm": 1.0524240732192993, "learning_rate": 1.4105312472234114e-05, "loss": 2.9793889999389647, "step": 36570 }, { "epoch": 0.295278609655885, "grad_norm": 0.7341775298118591, "learning_rate": 1.4103697002495901e-05, "loss": 2.7621313095092774, "step": 36580 }, { "epoch": 0.29535933098165207, "grad_norm": 1.173559308052063, "learning_rate": 1.410208153275769e-05, "loss": 3.4001911163330076, "step": 36590 }, { "epoch": 0.2954400523074191, "grad_norm": 1.1155428886413574, "learning_rate": 1.4100466063019475e-05, "loss": 3.173981475830078, "step": 36600 }, { "epoch": 0.29552077363318613, "grad_norm": 1.2973304986953735, "learning_rate": 1.4098850593281263e-05, "loss": 2.645751953125, "step": 36610 }, { "epoch": 0.2956014949589532, "grad_norm": 1.107076644897461, "learning_rate": 1.4097235123543049e-05, "loss": 3.2351329803466795, "step": 36620 }, { "epoch": 0.29568221628472025, "grad_norm": 0.8258336782455444, "learning_rate": 1.4095619653804837e-05, "loss": 3.9359344482421874, "step": 36630 }, { "epoch": 0.29576293761048733, "grad_norm": 0.7925887107849121, "learning_rate": 1.4094004184066623e-05, "loss": 3.3722572326660156, "step": 36640 }, { "epoch": 0.29584365893625436, "grad_norm": 0.9044311046600342, "learning_rate": 1.409238871432841e-05, "loss": 3.3369773864746093, "step": 36650 }, { "epoch": 0.29592438026202145, "grad_norm": 0.8447734117507935, "learning_rate": 1.4090773244590196e-05, "loss": 2.884263610839844, "step": 36660 }, { "epoch": 0.2960051015877885, "grad_norm": 1.0362111330032349, "learning_rate": 1.4089157774851986e-05, "loss": 3.1153518676757814, "step": 36670 }, { "epoch": 0.2960858229135555, "grad_norm": 1.3075617551803589, "learning_rate": 1.408754230511377e-05, "loss": 3.7984973907470705, "step": 36680 }, { "epoch": 0.2961665442393226, "grad_norm": 0.9432900547981262, "learning_rate": 1.408592683537556e-05, "loss": 2.9370100021362306, "step": 36690 }, { "epoch": 0.2962472655650896, "grad_norm": 0.8878467082977295, "learning_rate": 1.4084311365637344e-05, "loss": 2.850386619567871, "step": 36700 }, { "epoch": 0.2963279868908567, "grad_norm": 1.2489705085754395, "learning_rate": 1.4082695895899133e-05, "loss": 3.084121513366699, "step": 36710 }, { "epoch": 0.29640870821662374, "grad_norm": 0.7934015989303589, "learning_rate": 1.4081080426160917e-05, "loss": 2.6523038864135744, "step": 36720 }, { "epoch": 0.2964894295423908, "grad_norm": 0.7553940415382385, "learning_rate": 1.4079464956422707e-05, "loss": 2.835515785217285, "step": 36730 }, { "epoch": 0.29657015086815786, "grad_norm": 0.8899442553520203, "learning_rate": 1.4077849486684491e-05, "loss": 2.772851753234863, "step": 36740 }, { "epoch": 0.2966508721939249, "grad_norm": 0.7097998261451721, "learning_rate": 1.407623401694628e-05, "loss": 3.169818115234375, "step": 36750 }, { "epoch": 0.29673159351969197, "grad_norm": 1.1472859382629395, "learning_rate": 1.4074618547208065e-05, "loss": 3.4028690338134764, "step": 36760 }, { "epoch": 0.296812314845459, "grad_norm": 0.7760313153266907, "learning_rate": 1.4073003077469854e-05, "loss": 3.176578140258789, "step": 36770 }, { "epoch": 0.2968930361712261, "grad_norm": 0.8303679823875427, "learning_rate": 1.4071387607731638e-05, "loss": 2.8425930023193358, "step": 36780 }, { "epoch": 0.2969737574969931, "grad_norm": 1.203418493270874, "learning_rate": 1.4069772137993428e-05, "loss": 2.8812801361083986, "step": 36790 }, { "epoch": 0.2970544788227602, "grad_norm": 0.8549426794052124, "learning_rate": 1.4068156668255212e-05, "loss": 3.129649543762207, "step": 36800 }, { "epoch": 0.29713520014852723, "grad_norm": 1.0759460926055908, "learning_rate": 1.4066541198517001e-05, "loss": 2.8620315551757813, "step": 36810 }, { "epoch": 0.2972159214742943, "grad_norm": 1.8005067110061646, "learning_rate": 1.4064925728778786e-05, "loss": 2.8948198318481446, "step": 36820 }, { "epoch": 0.29729664280006135, "grad_norm": 1.232182264328003, "learning_rate": 1.4063310259040575e-05, "loss": 3.159809684753418, "step": 36830 }, { "epoch": 0.2973773641258284, "grad_norm": 0.9064581990242004, "learning_rate": 1.4061694789302361e-05, "loss": 3.3216617584228514, "step": 36840 }, { "epoch": 0.29745808545159547, "grad_norm": 0.5457286238670349, "learning_rate": 1.4060079319564149e-05, "loss": 3.214919662475586, "step": 36850 }, { "epoch": 0.2975388067773625, "grad_norm": 1.1537612676620483, "learning_rate": 1.4058463849825935e-05, "loss": 3.3269889831542967, "step": 36860 }, { "epoch": 0.2976195281031296, "grad_norm": 0.9499176740646362, "learning_rate": 1.4056848380087723e-05, "loss": 2.849814224243164, "step": 36870 }, { "epoch": 0.2977002494288966, "grad_norm": 0.5567981600761414, "learning_rate": 1.4055232910349509e-05, "loss": 2.882090759277344, "step": 36880 }, { "epoch": 0.2977809707546637, "grad_norm": 1.7468211650848389, "learning_rate": 1.4053617440611296e-05, "loss": 3.2282581329345703, "step": 36890 }, { "epoch": 0.29786169208043073, "grad_norm": 1.4856139421463013, "learning_rate": 1.4052001970873082e-05, "loss": 2.684739685058594, "step": 36900 }, { "epoch": 0.29794241340619776, "grad_norm": 0.8873075246810913, "learning_rate": 1.405038650113487e-05, "loss": 3.248486328125, "step": 36910 }, { "epoch": 0.29802313473196484, "grad_norm": 0.6078194379806519, "learning_rate": 1.4048771031396656e-05, "loss": 2.795821952819824, "step": 36920 }, { "epoch": 0.2981038560577319, "grad_norm": 1.088034987449646, "learning_rate": 1.4047155561658444e-05, "loss": 2.7181106567382813, "step": 36930 }, { "epoch": 0.29818457738349896, "grad_norm": 0.7776758670806885, "learning_rate": 1.404554009192023e-05, "loss": 3.603213882446289, "step": 36940 }, { "epoch": 0.298265298709266, "grad_norm": 0.7819722294807434, "learning_rate": 1.4043924622182017e-05, "loss": 2.944062042236328, "step": 36950 }, { "epoch": 0.2983460200350331, "grad_norm": 0.7113257646560669, "learning_rate": 1.4042309152443803e-05, "loss": 3.109380531311035, "step": 36960 }, { "epoch": 0.2984267413608001, "grad_norm": 1.2696465253829956, "learning_rate": 1.4040693682705591e-05, "loss": 2.8733049392700196, "step": 36970 }, { "epoch": 0.29850746268656714, "grad_norm": 0.6802255511283875, "learning_rate": 1.4039078212967377e-05, "loss": 3.115769386291504, "step": 36980 }, { "epoch": 0.2985881840123342, "grad_norm": 1.379162311553955, "learning_rate": 1.4037462743229165e-05, "loss": 3.2844329833984376, "step": 36990 }, { "epoch": 0.29866890533810125, "grad_norm": 0.6422668099403381, "learning_rate": 1.403584727349095e-05, "loss": 3.298221206665039, "step": 37000 }, { "epoch": 0.29874962666386834, "grad_norm": 0.7208858728408813, "learning_rate": 1.4034231803752738e-05, "loss": 3.512166976928711, "step": 37010 }, { "epoch": 0.29883034798963537, "grad_norm": 1.6586414575576782, "learning_rate": 1.4032616334014524e-05, "loss": 3.0566600799560546, "step": 37020 }, { "epoch": 0.29891106931540246, "grad_norm": 1.044025182723999, "learning_rate": 1.4031000864276312e-05, "loss": 3.155028533935547, "step": 37030 }, { "epoch": 0.2989917906411695, "grad_norm": 0.8345552682876587, "learning_rate": 1.4029385394538098e-05, "loss": 2.958564567565918, "step": 37040 }, { "epoch": 0.29907251196693657, "grad_norm": 1.2914087772369385, "learning_rate": 1.4027769924799886e-05, "loss": 3.196456718444824, "step": 37050 }, { "epoch": 0.2991532332927036, "grad_norm": 0.6043474674224854, "learning_rate": 1.4026154455061672e-05, "loss": 2.7437686920166016, "step": 37060 }, { "epoch": 0.29923395461847063, "grad_norm": 0.8248097896575928, "learning_rate": 1.402453898532346e-05, "loss": 2.6682451248168944, "step": 37070 }, { "epoch": 0.2993146759442377, "grad_norm": 1.0839475393295288, "learning_rate": 1.4022923515585245e-05, "loss": 2.9293880462646484, "step": 37080 }, { "epoch": 0.29939539727000475, "grad_norm": 1.1798938512802124, "learning_rate": 1.4021308045847033e-05, "loss": 3.31168212890625, "step": 37090 }, { "epoch": 0.29947611859577183, "grad_norm": 1.1213312149047852, "learning_rate": 1.4019692576108819e-05, "loss": 3.2240711212158204, "step": 37100 }, { "epoch": 0.29955683992153886, "grad_norm": 1.9136947393417358, "learning_rate": 1.4018077106370607e-05, "loss": 3.315367889404297, "step": 37110 }, { "epoch": 0.29963756124730595, "grad_norm": 0.9650248885154724, "learning_rate": 1.4016461636632393e-05, "loss": 2.988747787475586, "step": 37120 }, { "epoch": 0.299718282573073, "grad_norm": 0.5614321827888489, "learning_rate": 1.401484616689418e-05, "loss": 3.1908178329467773, "step": 37130 }, { "epoch": 0.29979900389884, "grad_norm": 1.4557595252990723, "learning_rate": 1.4013230697155967e-05, "loss": 2.5756658554077148, "step": 37140 }, { "epoch": 0.2998797252246071, "grad_norm": 0.9835056066513062, "learning_rate": 1.4011615227417754e-05, "loss": 3.080559730529785, "step": 37150 }, { "epoch": 0.2999604465503741, "grad_norm": 0.8559861183166504, "learning_rate": 1.400999975767954e-05, "loss": 2.8499345779418945, "step": 37160 }, { "epoch": 0.3000411678761412, "grad_norm": 1.1314094066619873, "learning_rate": 1.4008384287941328e-05, "loss": 3.288448715209961, "step": 37170 }, { "epoch": 0.30012188920190824, "grad_norm": 0.8357308506965637, "learning_rate": 1.4006768818203114e-05, "loss": 3.0519683837890623, "step": 37180 }, { "epoch": 0.30020261052767533, "grad_norm": 1.011196494102478, "learning_rate": 1.4005153348464902e-05, "loss": 2.892414665222168, "step": 37190 }, { "epoch": 0.30028333185344236, "grad_norm": 1.0416148900985718, "learning_rate": 1.4003537878726688e-05, "loss": 2.843448829650879, "step": 37200 }, { "epoch": 0.3003640531792094, "grad_norm": 1.052011489868164, "learning_rate": 1.4001922408988475e-05, "loss": 3.3292057037353517, "step": 37210 }, { "epoch": 0.3004447745049765, "grad_norm": 0.814655601978302, "learning_rate": 1.4000306939250261e-05, "loss": 3.4623302459716796, "step": 37220 }, { "epoch": 0.3005254958307435, "grad_norm": 0.9931538701057434, "learning_rate": 1.3998691469512049e-05, "loss": 3.145778846740723, "step": 37230 }, { "epoch": 0.3006062171565106, "grad_norm": 1.1552927494049072, "learning_rate": 1.3997075999773835e-05, "loss": 3.226858139038086, "step": 37240 }, { "epoch": 0.3006869384822776, "grad_norm": 0.7656985521316528, "learning_rate": 1.3995460530035623e-05, "loss": 2.9612262725830076, "step": 37250 }, { "epoch": 0.3007676598080447, "grad_norm": 1.4075045585632324, "learning_rate": 1.3993845060297409e-05, "loss": 3.0911933898925783, "step": 37260 }, { "epoch": 0.30084838113381174, "grad_norm": 1.6165649890899658, "learning_rate": 1.3992229590559196e-05, "loss": 3.0266912460327147, "step": 37270 }, { "epoch": 0.3009291024595788, "grad_norm": 1.2482142448425293, "learning_rate": 1.3990614120820982e-05, "loss": 3.4077068328857423, "step": 37280 }, { "epoch": 0.30100982378534585, "grad_norm": 0.9384520649909973, "learning_rate": 1.398899865108277e-05, "loss": 2.8843757629394533, "step": 37290 }, { "epoch": 0.3010905451111129, "grad_norm": 1.3730565309524536, "learning_rate": 1.3987383181344556e-05, "loss": 3.031009292602539, "step": 37300 }, { "epoch": 0.30117126643687997, "grad_norm": 1.073917031288147, "learning_rate": 1.3985767711606344e-05, "loss": 3.0988800048828127, "step": 37310 }, { "epoch": 0.301251987762647, "grad_norm": 0.4628409147262573, "learning_rate": 1.398415224186813e-05, "loss": 2.882817840576172, "step": 37320 }, { "epoch": 0.3013327090884141, "grad_norm": 0.7591283321380615, "learning_rate": 1.3982536772129917e-05, "loss": 2.866041374206543, "step": 37330 }, { "epoch": 0.3014134304141811, "grad_norm": 1.1980783939361572, "learning_rate": 1.3980921302391703e-05, "loss": 3.1600027084350586, "step": 37340 }, { "epoch": 0.3014941517399482, "grad_norm": 0.8375239968299866, "learning_rate": 1.3979305832653491e-05, "loss": 3.0479558944702148, "step": 37350 }, { "epoch": 0.30157487306571523, "grad_norm": 1.7862578630447388, "learning_rate": 1.3977690362915277e-05, "loss": 3.17032527923584, "step": 37360 }, { "epoch": 0.30165559439148226, "grad_norm": 1.930547833442688, "learning_rate": 1.3976074893177065e-05, "loss": 3.715282440185547, "step": 37370 }, { "epoch": 0.30173631571724935, "grad_norm": 0.7553117871284485, "learning_rate": 1.397445942343885e-05, "loss": 3.065524864196777, "step": 37380 }, { "epoch": 0.3018170370430164, "grad_norm": 1.0073466300964355, "learning_rate": 1.3972843953700639e-05, "loss": 3.166200065612793, "step": 37390 }, { "epoch": 0.30189775836878346, "grad_norm": 0.7612374424934387, "learning_rate": 1.3971228483962425e-05, "loss": 2.8612350463867187, "step": 37400 }, { "epoch": 0.3019784796945505, "grad_norm": 1.0367180109024048, "learning_rate": 1.3969613014224212e-05, "loss": 3.2091438293457033, "step": 37410 }, { "epoch": 0.3020592010203176, "grad_norm": 1.0496717691421509, "learning_rate": 1.3967997544485998e-05, "loss": 3.4197616577148438, "step": 37420 }, { "epoch": 0.3021399223460846, "grad_norm": 0.6496586799621582, "learning_rate": 1.3966382074747786e-05, "loss": 2.986635208129883, "step": 37430 }, { "epoch": 0.30222064367185164, "grad_norm": 2.075277805328369, "learning_rate": 1.3964766605009572e-05, "loss": 2.9995695114135743, "step": 37440 }, { "epoch": 0.3023013649976187, "grad_norm": 0.7889974117279053, "learning_rate": 1.396315113527136e-05, "loss": 2.715464782714844, "step": 37450 }, { "epoch": 0.30238208632338576, "grad_norm": 0.9090296626091003, "learning_rate": 1.3961535665533146e-05, "loss": 2.9040082931518554, "step": 37460 }, { "epoch": 0.30246280764915284, "grad_norm": 0.722518801689148, "learning_rate": 1.3959920195794933e-05, "loss": 3.2315322875976564, "step": 37470 }, { "epoch": 0.3025435289749199, "grad_norm": 1.4352474212646484, "learning_rate": 1.395830472605672e-05, "loss": 2.6458515167236327, "step": 37480 }, { "epoch": 0.30262425030068696, "grad_norm": 0.7644381523132324, "learning_rate": 1.3956689256318507e-05, "loss": 2.8876708984375, "step": 37490 }, { "epoch": 0.302704971626454, "grad_norm": 1.1952356100082397, "learning_rate": 1.3955073786580293e-05, "loss": 2.816889762878418, "step": 37500 }, { "epoch": 0.3027856929522211, "grad_norm": 0.6313610672950745, "learning_rate": 1.395345831684208e-05, "loss": 2.477198028564453, "step": 37510 }, { "epoch": 0.3028664142779881, "grad_norm": 0.7498537302017212, "learning_rate": 1.3951842847103867e-05, "loss": 3.3538326263427733, "step": 37520 }, { "epoch": 0.30294713560375514, "grad_norm": 1.0667765140533447, "learning_rate": 1.3950227377365654e-05, "loss": 3.6081573486328127, "step": 37530 }, { "epoch": 0.3030278569295222, "grad_norm": 1.1675491333007812, "learning_rate": 1.394861190762744e-05, "loss": 3.056879997253418, "step": 37540 }, { "epoch": 0.30310857825528925, "grad_norm": 1.0336805582046509, "learning_rate": 1.3946996437889228e-05, "loss": 3.2764530181884766, "step": 37550 }, { "epoch": 0.30318929958105634, "grad_norm": 0.6120790243148804, "learning_rate": 1.3945380968151014e-05, "loss": 2.785676956176758, "step": 37560 }, { "epoch": 0.30327002090682337, "grad_norm": 0.7164564728736877, "learning_rate": 1.3943765498412802e-05, "loss": 2.758783721923828, "step": 37570 }, { "epoch": 0.30335074223259045, "grad_norm": 1.7096564769744873, "learning_rate": 1.3942150028674588e-05, "loss": 3.3449058532714844, "step": 37580 }, { "epoch": 0.3034314635583575, "grad_norm": 1.0821712017059326, "learning_rate": 1.3940534558936375e-05, "loss": 3.1765668869018553, "step": 37590 }, { "epoch": 0.3035121848841245, "grad_norm": 1.0188031196594238, "learning_rate": 1.3938919089198161e-05, "loss": 3.3056690216064455, "step": 37600 }, { "epoch": 0.3035929062098916, "grad_norm": 1.0462208986282349, "learning_rate": 1.3937303619459949e-05, "loss": 2.7857135772705077, "step": 37610 }, { "epoch": 0.30367362753565863, "grad_norm": 1.197706937789917, "learning_rate": 1.3935688149721735e-05, "loss": 2.984964942932129, "step": 37620 }, { "epoch": 0.3037543488614257, "grad_norm": 1.2061220407485962, "learning_rate": 1.3934072679983523e-05, "loss": 2.973055839538574, "step": 37630 }, { "epoch": 0.30383507018719275, "grad_norm": 1.1160192489624023, "learning_rate": 1.3932457210245309e-05, "loss": 3.0884984970092773, "step": 37640 }, { "epoch": 0.30391579151295983, "grad_norm": 1.667346715927124, "learning_rate": 1.3930841740507096e-05, "loss": 3.0970029830932617, "step": 37650 }, { "epoch": 0.30399651283872686, "grad_norm": 0.738175094127655, "learning_rate": 1.3929226270768882e-05, "loss": 2.7781349182128907, "step": 37660 }, { "epoch": 0.3040772341644939, "grad_norm": 0.6879768967628479, "learning_rate": 1.392761080103067e-05, "loss": 3.1146541595458985, "step": 37670 }, { "epoch": 0.304157955490261, "grad_norm": 1.3877955675125122, "learning_rate": 1.3925995331292456e-05, "loss": 2.9749237060546876, "step": 37680 }, { "epoch": 0.304238676816028, "grad_norm": 1.1263660192489624, "learning_rate": 1.3924379861554244e-05, "loss": 2.840692138671875, "step": 37690 }, { "epoch": 0.3043193981417951, "grad_norm": 0.8768396377563477, "learning_rate": 1.392276439181603e-05, "loss": 2.9411170959472654, "step": 37700 }, { "epoch": 0.3044001194675621, "grad_norm": 1.3693749904632568, "learning_rate": 1.392114892207782e-05, "loss": 3.2722709655761717, "step": 37710 }, { "epoch": 0.3044808407933292, "grad_norm": 0.8197869062423706, "learning_rate": 1.3919533452339604e-05, "loss": 3.089084434509277, "step": 37720 }, { "epoch": 0.30456156211909624, "grad_norm": 1.0780789852142334, "learning_rate": 1.3917917982601393e-05, "loss": 3.099598693847656, "step": 37730 }, { "epoch": 0.30464228344486327, "grad_norm": 1.1746143102645874, "learning_rate": 1.3916302512863177e-05, "loss": 2.988772964477539, "step": 37740 }, { "epoch": 0.30472300477063036, "grad_norm": 0.7488767504692078, "learning_rate": 1.3914687043124967e-05, "loss": 3.7035003662109376, "step": 37750 }, { "epoch": 0.3048037260963974, "grad_norm": 1.049401044845581, "learning_rate": 1.3913071573386751e-05, "loss": 3.0506923675537108, "step": 37760 }, { "epoch": 0.3048844474221645, "grad_norm": 0.9206780791282654, "learning_rate": 1.391145610364854e-05, "loss": 2.8026525497436525, "step": 37770 }, { "epoch": 0.3049651687479315, "grad_norm": 2.3959097862243652, "learning_rate": 1.3909840633910325e-05, "loss": 3.990414047241211, "step": 37780 }, { "epoch": 0.3050458900736986, "grad_norm": 1.1862260103225708, "learning_rate": 1.3908225164172114e-05, "loss": 3.3289039611816404, "step": 37790 }, { "epoch": 0.3051266113994656, "grad_norm": 0.9061155915260315, "learning_rate": 1.3906609694433898e-05, "loss": 2.7901145935058596, "step": 37800 }, { "epoch": 0.3052073327252327, "grad_norm": 0.8274844288825989, "learning_rate": 1.3904994224695688e-05, "loss": 2.847774124145508, "step": 37810 }, { "epoch": 0.30528805405099974, "grad_norm": 1.0415151119232178, "learning_rate": 1.3903378754957472e-05, "loss": 2.6849136352539062, "step": 37820 }, { "epoch": 0.30536877537676677, "grad_norm": 0.975945770740509, "learning_rate": 1.3901763285219261e-05, "loss": 3.28056640625, "step": 37830 }, { "epoch": 0.30544949670253385, "grad_norm": 1.0222342014312744, "learning_rate": 1.3900147815481049e-05, "loss": 2.9042993545532227, "step": 37840 }, { "epoch": 0.3055302180283009, "grad_norm": 0.9698636531829834, "learning_rate": 1.3898532345742835e-05, "loss": 2.9529550552368162, "step": 37850 }, { "epoch": 0.30561093935406797, "grad_norm": 0.9903438091278076, "learning_rate": 1.3896916876004623e-05, "loss": 2.9747934341430664, "step": 37860 }, { "epoch": 0.305691660679835, "grad_norm": 0.7770512700080872, "learning_rate": 1.3895301406266409e-05, "loss": 3.034402275085449, "step": 37870 }, { "epoch": 0.3057723820056021, "grad_norm": 0.9229573011398315, "learning_rate": 1.3893685936528196e-05, "loss": 2.809845542907715, "step": 37880 }, { "epoch": 0.3058531033313691, "grad_norm": 0.8061956763267517, "learning_rate": 1.3892070466789982e-05, "loss": 3.074512481689453, "step": 37890 }, { "epoch": 0.30593382465713614, "grad_norm": 0.8438577651977539, "learning_rate": 1.389045499705177e-05, "loss": 3.215931701660156, "step": 37900 }, { "epoch": 0.30601454598290323, "grad_norm": 0.7183136940002441, "learning_rate": 1.3888839527313556e-05, "loss": 3.093160057067871, "step": 37910 }, { "epoch": 0.30609526730867026, "grad_norm": 1.0210084915161133, "learning_rate": 1.3887224057575344e-05, "loss": 2.8969017028808595, "step": 37920 }, { "epoch": 0.30617598863443735, "grad_norm": 1.4984835386276245, "learning_rate": 1.388560858783713e-05, "loss": 2.5966577529907227, "step": 37930 }, { "epoch": 0.3062567099602044, "grad_norm": 1.190748929977417, "learning_rate": 1.3883993118098918e-05, "loss": 2.719929313659668, "step": 37940 }, { "epoch": 0.30633743128597146, "grad_norm": 0.8386756181716919, "learning_rate": 1.3882377648360704e-05, "loss": 3.0380844116210937, "step": 37950 }, { "epoch": 0.3064181526117385, "grad_norm": 0.7071795463562012, "learning_rate": 1.3880762178622491e-05, "loss": 2.9253522872924806, "step": 37960 }, { "epoch": 0.3064988739375055, "grad_norm": 1.2364519834518433, "learning_rate": 1.3879146708884277e-05, "loss": 2.9811578750610352, "step": 37970 }, { "epoch": 0.3065795952632726, "grad_norm": 0.6796515583992004, "learning_rate": 1.3877531239146065e-05, "loss": 2.707174873352051, "step": 37980 }, { "epoch": 0.30666031658903964, "grad_norm": 0.8611869215965271, "learning_rate": 1.3875915769407851e-05, "loss": 3.000249481201172, "step": 37990 }, { "epoch": 0.3067410379148067, "grad_norm": 0.7661072611808777, "learning_rate": 1.3874300299669639e-05, "loss": 2.7406242370605467, "step": 38000 }, { "epoch": 0.30682175924057375, "grad_norm": 0.9568239450454712, "learning_rate": 1.3872684829931425e-05, "loss": 3.09749813079834, "step": 38010 }, { "epoch": 0.30690248056634084, "grad_norm": 0.9113648533821106, "learning_rate": 1.3871069360193212e-05, "loss": 3.146090507507324, "step": 38020 }, { "epoch": 0.30698320189210787, "grad_norm": 0.9123647809028625, "learning_rate": 1.3869453890454998e-05, "loss": 2.7341785430908203, "step": 38030 }, { "epoch": 0.30706392321787496, "grad_norm": 0.5121285915374756, "learning_rate": 1.3867838420716786e-05, "loss": 2.93427734375, "step": 38040 }, { "epoch": 0.307144644543642, "grad_norm": 1.0450307130813599, "learning_rate": 1.3866222950978572e-05, "loss": 3.0971269607543945, "step": 38050 }, { "epoch": 0.307225365869409, "grad_norm": 0.8981051445007324, "learning_rate": 1.386460748124036e-05, "loss": 3.090351104736328, "step": 38060 }, { "epoch": 0.3073060871951761, "grad_norm": 0.7343315482139587, "learning_rate": 1.3862992011502146e-05, "loss": 2.784076690673828, "step": 38070 }, { "epoch": 0.30738680852094313, "grad_norm": 1.8087093830108643, "learning_rate": 1.3861376541763933e-05, "loss": 3.728257751464844, "step": 38080 }, { "epoch": 0.3074675298467102, "grad_norm": 1.0214779376983643, "learning_rate": 1.385976107202572e-05, "loss": 3.1765756607055664, "step": 38090 }, { "epoch": 0.30754825117247725, "grad_norm": 0.664914608001709, "learning_rate": 1.3858145602287507e-05, "loss": 2.875860595703125, "step": 38100 }, { "epoch": 0.30762897249824434, "grad_norm": 1.1993614435195923, "learning_rate": 1.3856530132549293e-05, "loss": 2.866816520690918, "step": 38110 }, { "epoch": 0.30770969382401137, "grad_norm": 0.9699025750160217, "learning_rate": 1.385491466281108e-05, "loss": 2.667897605895996, "step": 38120 }, { "epoch": 0.3077904151497784, "grad_norm": 1.4371960163116455, "learning_rate": 1.3853299193072867e-05, "loss": 2.454501914978027, "step": 38130 }, { "epoch": 0.3078711364755455, "grad_norm": 1.0769548416137695, "learning_rate": 1.3851683723334654e-05, "loss": 2.815547561645508, "step": 38140 }, { "epoch": 0.3079518578013125, "grad_norm": 1.322243332862854, "learning_rate": 1.385006825359644e-05, "loss": 2.7328033447265625, "step": 38150 }, { "epoch": 0.3080325791270796, "grad_norm": 0.6935000419616699, "learning_rate": 1.3848452783858228e-05, "loss": 3.0815349578857423, "step": 38160 }, { "epoch": 0.30811330045284663, "grad_norm": 0.9722278118133545, "learning_rate": 1.3846837314120014e-05, "loss": 2.944605255126953, "step": 38170 }, { "epoch": 0.3081940217786137, "grad_norm": 0.7981176376342773, "learning_rate": 1.3845221844381802e-05, "loss": 2.955804443359375, "step": 38180 }, { "epoch": 0.30827474310438074, "grad_norm": 0.9031743407249451, "learning_rate": 1.3843606374643588e-05, "loss": 2.974293327331543, "step": 38190 }, { "epoch": 0.3083554644301478, "grad_norm": 1.019623875617981, "learning_rate": 1.3841990904905376e-05, "loss": 2.7726924896240233, "step": 38200 }, { "epoch": 0.30843618575591486, "grad_norm": 1.360242486000061, "learning_rate": 1.3840375435167162e-05, "loss": 3.0977949142456054, "step": 38210 }, { "epoch": 0.3085169070816819, "grad_norm": 2.496701955795288, "learning_rate": 1.383875996542895e-05, "loss": 3.105373764038086, "step": 38220 }, { "epoch": 0.308597628407449, "grad_norm": 1.2308229207992554, "learning_rate": 1.3837144495690735e-05, "loss": 2.849897575378418, "step": 38230 }, { "epoch": 0.308678349733216, "grad_norm": 0.9285712838172913, "learning_rate": 1.3835529025952523e-05, "loss": 2.696438789367676, "step": 38240 }, { "epoch": 0.3087590710589831, "grad_norm": 0.7857763767242432, "learning_rate": 1.3833913556214309e-05, "loss": 2.73116455078125, "step": 38250 }, { "epoch": 0.3088397923847501, "grad_norm": 1.0778533220291138, "learning_rate": 1.3832298086476097e-05, "loss": 3.0007087707519533, "step": 38260 }, { "epoch": 0.3089205137105172, "grad_norm": 0.7566224336624146, "learning_rate": 1.3830682616737883e-05, "loss": 3.178632354736328, "step": 38270 }, { "epoch": 0.30900123503628424, "grad_norm": 0.8436128497123718, "learning_rate": 1.382906714699967e-05, "loss": 2.970941352844238, "step": 38280 }, { "epoch": 0.30908195636205127, "grad_norm": 0.9970106482505798, "learning_rate": 1.3827451677261456e-05, "loss": 3.0206438064575196, "step": 38290 }, { "epoch": 0.30916267768781835, "grad_norm": 0.7843144536018372, "learning_rate": 1.3825836207523244e-05, "loss": 2.687761688232422, "step": 38300 }, { "epoch": 0.3092433990135854, "grad_norm": 1.0353988409042358, "learning_rate": 1.382422073778503e-05, "loss": 3.381633758544922, "step": 38310 }, { "epoch": 0.30932412033935247, "grad_norm": 0.854710578918457, "learning_rate": 1.3822605268046818e-05, "loss": 3.0745189666748045, "step": 38320 }, { "epoch": 0.3094048416651195, "grad_norm": 1.1561306715011597, "learning_rate": 1.3820989798308604e-05, "loss": 3.13687744140625, "step": 38330 }, { "epoch": 0.3094855629908866, "grad_norm": 0.5803250670433044, "learning_rate": 1.3819374328570391e-05, "loss": 3.0592952728271485, "step": 38340 }, { "epoch": 0.3095662843166536, "grad_norm": 0.9173755049705505, "learning_rate": 1.3817758858832177e-05, "loss": 3.194839859008789, "step": 38350 }, { "epoch": 0.30964700564242065, "grad_norm": 1.1272915601730347, "learning_rate": 1.3816143389093965e-05, "loss": 3.647675323486328, "step": 38360 }, { "epoch": 0.30972772696818773, "grad_norm": 1.081626057624817, "learning_rate": 1.3814527919355751e-05, "loss": 2.8593891143798826, "step": 38370 }, { "epoch": 0.30980844829395476, "grad_norm": 1.0253560543060303, "learning_rate": 1.3812912449617539e-05, "loss": 2.824288749694824, "step": 38380 }, { "epoch": 0.30988916961972185, "grad_norm": 1.6560137271881104, "learning_rate": 1.3811296979879325e-05, "loss": 3.6355018615722656, "step": 38390 }, { "epoch": 0.3099698909454889, "grad_norm": 0.7585424184799194, "learning_rate": 1.3809681510141112e-05, "loss": 2.775094413757324, "step": 38400 }, { "epoch": 0.31005061227125597, "grad_norm": 1.6341147422790527, "learning_rate": 1.3808066040402898e-05, "loss": 3.5688987731933595, "step": 38410 }, { "epoch": 0.310131333597023, "grad_norm": 1.4932705163955688, "learning_rate": 1.3806450570664686e-05, "loss": 3.2508865356445313, "step": 38420 }, { "epoch": 0.31021205492279, "grad_norm": 0.6171879172325134, "learning_rate": 1.3804835100926472e-05, "loss": 2.858596420288086, "step": 38430 }, { "epoch": 0.3102927762485571, "grad_norm": 1.0923775434494019, "learning_rate": 1.380321963118826e-05, "loss": 2.750462532043457, "step": 38440 }, { "epoch": 0.31037349757432414, "grad_norm": 0.9390407800674438, "learning_rate": 1.3801604161450046e-05, "loss": 2.9897781372070313, "step": 38450 }, { "epoch": 0.31045421890009123, "grad_norm": 1.4608445167541504, "learning_rate": 1.3799988691711834e-05, "loss": 2.958143424987793, "step": 38460 }, { "epoch": 0.31053494022585826, "grad_norm": 0.7991313934326172, "learning_rate": 1.379837322197362e-05, "loss": 2.8571372985839845, "step": 38470 }, { "epoch": 0.31061566155162534, "grad_norm": 1.31078040599823, "learning_rate": 1.3796757752235407e-05, "loss": 3.0101165771484375, "step": 38480 }, { "epoch": 0.3106963828773924, "grad_norm": 0.9547271132469177, "learning_rate": 1.3795142282497193e-05, "loss": 2.934846305847168, "step": 38490 }, { "epoch": 0.31077710420315946, "grad_norm": 1.248809576034546, "learning_rate": 1.3793526812758981e-05, "loss": 2.6498128890991213, "step": 38500 }, { "epoch": 0.3108578255289265, "grad_norm": 2.1421995162963867, "learning_rate": 1.3791911343020767e-05, "loss": 3.0612371444702147, "step": 38510 }, { "epoch": 0.3109385468546935, "grad_norm": 0.9704676270484924, "learning_rate": 1.3790295873282555e-05, "loss": 2.8719879150390626, "step": 38520 }, { "epoch": 0.3110192681804606, "grad_norm": 0.6546134352684021, "learning_rate": 1.378868040354434e-05, "loss": 2.8810136795043944, "step": 38530 }, { "epoch": 0.31109998950622764, "grad_norm": 0.7513318657875061, "learning_rate": 1.3787064933806128e-05, "loss": 2.8656654357910156, "step": 38540 }, { "epoch": 0.3111807108319947, "grad_norm": 1.0258405208587646, "learning_rate": 1.3785449464067914e-05, "loss": 3.1230710983276366, "step": 38550 }, { "epoch": 0.31126143215776175, "grad_norm": 1.1281704902648926, "learning_rate": 1.3783833994329702e-05, "loss": 3.0577524185180662, "step": 38560 }, { "epoch": 0.31134215348352884, "grad_norm": 0.6209853291511536, "learning_rate": 1.3782218524591488e-05, "loss": 3.126865577697754, "step": 38570 }, { "epoch": 0.31142287480929587, "grad_norm": 1.005876064300537, "learning_rate": 1.3780603054853277e-05, "loss": 2.889740753173828, "step": 38580 }, { "epoch": 0.3115035961350629, "grad_norm": 1.0361305475234985, "learning_rate": 1.3778987585115062e-05, "loss": 2.8380218505859376, "step": 38590 }, { "epoch": 0.31158431746083, "grad_norm": 1.1980684995651245, "learning_rate": 1.3777372115376851e-05, "loss": 3.411922836303711, "step": 38600 }, { "epoch": 0.311665038786597, "grad_norm": 1.2178794145584106, "learning_rate": 1.3775756645638635e-05, "loss": 2.84067440032959, "step": 38610 }, { "epoch": 0.3117457601123641, "grad_norm": 0.802634596824646, "learning_rate": 1.3774141175900425e-05, "loss": 3.283924865722656, "step": 38620 }, { "epoch": 0.31182648143813113, "grad_norm": 1.0469486713409424, "learning_rate": 1.3772525706162209e-05, "loss": 3.2806201934814454, "step": 38630 }, { "epoch": 0.3119072027638982, "grad_norm": 1.1400562524795532, "learning_rate": 1.3770910236423998e-05, "loss": 3.172462272644043, "step": 38640 }, { "epoch": 0.31198792408966525, "grad_norm": 0.9036402106285095, "learning_rate": 1.3769294766685783e-05, "loss": 3.213899612426758, "step": 38650 }, { "epoch": 0.3120686454154323, "grad_norm": 1.0146887302398682, "learning_rate": 1.3767679296947572e-05, "loss": 2.8112001419067383, "step": 38660 }, { "epoch": 0.31214936674119936, "grad_norm": 0.8430165648460388, "learning_rate": 1.3766063827209356e-05, "loss": 3.674312973022461, "step": 38670 }, { "epoch": 0.3122300880669664, "grad_norm": 0.9248590469360352, "learning_rate": 1.3764448357471146e-05, "loss": 3.190346527099609, "step": 38680 }, { "epoch": 0.3123108093927335, "grad_norm": 1.1995660066604614, "learning_rate": 1.376283288773293e-05, "loss": 3.0078542709350584, "step": 38690 }, { "epoch": 0.3123915307185005, "grad_norm": 1.0929421186447144, "learning_rate": 1.376121741799472e-05, "loss": 3.559967803955078, "step": 38700 }, { "epoch": 0.3124722520442676, "grad_norm": 4.888638019561768, "learning_rate": 1.3759601948256504e-05, "loss": 3.2208816528320314, "step": 38710 }, { "epoch": 0.3125529733700346, "grad_norm": 0.8850824236869812, "learning_rate": 1.3757986478518293e-05, "loss": 2.9565217971801756, "step": 38720 }, { "epoch": 0.3126336946958017, "grad_norm": 1.140194296836853, "learning_rate": 1.3756371008780078e-05, "loss": 2.7063241958618165, "step": 38730 }, { "epoch": 0.31271441602156874, "grad_norm": 1.1720705032348633, "learning_rate": 1.3754755539041867e-05, "loss": 3.218842697143555, "step": 38740 }, { "epoch": 0.31279513734733577, "grad_norm": 1.4950658082962036, "learning_rate": 1.3753140069303653e-05, "loss": 2.811758804321289, "step": 38750 }, { "epoch": 0.31287585867310286, "grad_norm": 0.762399435043335, "learning_rate": 1.375152459956544e-05, "loss": 3.5944660186767576, "step": 38760 }, { "epoch": 0.3129565799988699, "grad_norm": 1.0922638177871704, "learning_rate": 1.3749909129827227e-05, "loss": 3.7794635772705076, "step": 38770 }, { "epoch": 0.313037301324637, "grad_norm": 0.7638145089149475, "learning_rate": 1.3748293660089014e-05, "loss": 2.8718698501586912, "step": 38780 }, { "epoch": 0.313118022650404, "grad_norm": 2.0744001865386963, "learning_rate": 1.37466781903508e-05, "loss": 3.856058120727539, "step": 38790 }, { "epoch": 0.3131987439761711, "grad_norm": 1.046366810798645, "learning_rate": 1.3745062720612588e-05, "loss": 2.975228118896484, "step": 38800 }, { "epoch": 0.3132794653019381, "grad_norm": 0.71848464012146, "learning_rate": 1.3743447250874374e-05, "loss": 2.649436378479004, "step": 38810 }, { "epoch": 0.31336018662770515, "grad_norm": 0.7667532563209534, "learning_rate": 1.3741831781136162e-05, "loss": 2.9007923126220705, "step": 38820 }, { "epoch": 0.31344090795347224, "grad_norm": 0.9682435393333435, "learning_rate": 1.3740216311397948e-05, "loss": 2.986787796020508, "step": 38830 }, { "epoch": 0.31352162927923927, "grad_norm": 0.8778111338615417, "learning_rate": 1.3738600841659735e-05, "loss": 2.7640804290771483, "step": 38840 }, { "epoch": 0.31360235060500635, "grad_norm": 1.0925283432006836, "learning_rate": 1.3736985371921521e-05, "loss": 3.2989959716796875, "step": 38850 }, { "epoch": 0.3136830719307734, "grad_norm": 0.983860969543457, "learning_rate": 1.3735369902183309e-05, "loss": 3.0841190338134767, "step": 38860 }, { "epoch": 0.31376379325654047, "grad_norm": 0.8181291222572327, "learning_rate": 1.3733754432445095e-05, "loss": 2.7510583877563475, "step": 38870 }, { "epoch": 0.3138445145823075, "grad_norm": 0.7978771924972534, "learning_rate": 1.3732138962706883e-05, "loss": 3.2665771484375, "step": 38880 }, { "epoch": 0.31392523590807453, "grad_norm": 0.9507758617401123, "learning_rate": 1.3730523492968669e-05, "loss": 2.894801712036133, "step": 38890 }, { "epoch": 0.3140059572338416, "grad_norm": 0.8452838063240051, "learning_rate": 1.3728908023230456e-05, "loss": 3.3811904907226564, "step": 38900 }, { "epoch": 0.31408667855960865, "grad_norm": 1.928096890449524, "learning_rate": 1.3727292553492242e-05, "loss": 2.9712968826293946, "step": 38910 }, { "epoch": 0.31416739988537573, "grad_norm": 0.9105219841003418, "learning_rate": 1.372567708375403e-05, "loss": 3.1622364044189455, "step": 38920 }, { "epoch": 0.31424812121114276, "grad_norm": 0.8714613914489746, "learning_rate": 1.3724061614015816e-05, "loss": 2.76653995513916, "step": 38930 }, { "epoch": 0.31432884253690985, "grad_norm": 0.8860781192779541, "learning_rate": 1.3722446144277604e-05, "loss": 3.564148712158203, "step": 38940 }, { "epoch": 0.3144095638626769, "grad_norm": 0.8960681557655334, "learning_rate": 1.372083067453939e-05, "loss": 2.9061304092407227, "step": 38950 }, { "epoch": 0.3144902851884439, "grad_norm": 0.8331891298294067, "learning_rate": 1.3719215204801178e-05, "loss": 3.0533246994018555, "step": 38960 }, { "epoch": 0.314571006514211, "grad_norm": 0.8606890439987183, "learning_rate": 1.3717599735062964e-05, "loss": 3.524142837524414, "step": 38970 }, { "epoch": 0.314651727839978, "grad_norm": 0.7991055846214294, "learning_rate": 1.3715984265324751e-05, "loss": 3.209619140625, "step": 38980 }, { "epoch": 0.3147324491657451, "grad_norm": 1.4858156442642212, "learning_rate": 1.3714368795586537e-05, "loss": 2.8713932037353516, "step": 38990 }, { "epoch": 0.31481317049151214, "grad_norm": 0.7996634244918823, "learning_rate": 1.3712753325848325e-05, "loss": 3.231439208984375, "step": 39000 }, { "epoch": 0.3148938918172792, "grad_norm": 0.9808523654937744, "learning_rate": 1.3711137856110111e-05, "loss": 3.1276880264282227, "step": 39010 }, { "epoch": 0.31497461314304626, "grad_norm": 0.8361989855766296, "learning_rate": 1.3709522386371899e-05, "loss": 2.598810577392578, "step": 39020 }, { "epoch": 0.31505533446881334, "grad_norm": 1.0726608037948608, "learning_rate": 1.3707906916633685e-05, "loss": 3.236276626586914, "step": 39030 }, { "epoch": 0.31513605579458037, "grad_norm": 0.8931590914726257, "learning_rate": 1.3706291446895472e-05, "loss": 2.928221321105957, "step": 39040 }, { "epoch": 0.3152167771203474, "grad_norm": 1.1161351203918457, "learning_rate": 1.3704675977157258e-05, "loss": 2.77187442779541, "step": 39050 }, { "epoch": 0.3152974984461145, "grad_norm": 1.058940052986145, "learning_rate": 1.3703060507419046e-05, "loss": 2.5955526351928713, "step": 39060 }, { "epoch": 0.3153782197718815, "grad_norm": 0.7090407609939575, "learning_rate": 1.3701445037680832e-05, "loss": 2.9855329513549806, "step": 39070 }, { "epoch": 0.3154589410976486, "grad_norm": 1.227149248123169, "learning_rate": 1.369982956794262e-05, "loss": 3.553815460205078, "step": 39080 }, { "epoch": 0.31553966242341563, "grad_norm": 0.9125996232032776, "learning_rate": 1.3698214098204406e-05, "loss": 2.6983974456787108, "step": 39090 }, { "epoch": 0.3156203837491827, "grad_norm": 1.3814113140106201, "learning_rate": 1.3696598628466193e-05, "loss": 2.8799612045288088, "step": 39100 }, { "epoch": 0.31570110507494975, "grad_norm": 0.7066733837127686, "learning_rate": 1.3694983158727981e-05, "loss": 3.56473388671875, "step": 39110 }, { "epoch": 0.3157818264007168, "grad_norm": 1.0235555171966553, "learning_rate": 1.3693367688989767e-05, "loss": 2.6763128280639648, "step": 39120 }, { "epoch": 0.31586254772648387, "grad_norm": 1.080640435218811, "learning_rate": 1.3691752219251555e-05, "loss": 3.0585750579833983, "step": 39130 }, { "epoch": 0.3159432690522509, "grad_norm": 1.2848765850067139, "learning_rate": 1.369013674951334e-05, "loss": 3.0067182540893556, "step": 39140 }, { "epoch": 0.316023990378018, "grad_norm": 0.5584760904312134, "learning_rate": 1.3688521279775128e-05, "loss": 2.8600088119506837, "step": 39150 }, { "epoch": 0.316104711703785, "grad_norm": 0.5601982474327087, "learning_rate": 1.3686905810036914e-05, "loss": 2.9836597442626953, "step": 39160 }, { "epoch": 0.3161854330295521, "grad_norm": 1.2254928350448608, "learning_rate": 1.3685290340298702e-05, "loss": 2.9878753662109374, "step": 39170 }, { "epoch": 0.31626615435531913, "grad_norm": 0.886327862739563, "learning_rate": 1.3683674870560488e-05, "loss": 2.95810489654541, "step": 39180 }, { "epoch": 0.31634687568108616, "grad_norm": 0.7943648099899292, "learning_rate": 1.3682059400822276e-05, "loss": 3.442181396484375, "step": 39190 }, { "epoch": 0.31642759700685325, "grad_norm": 1.3431323766708374, "learning_rate": 1.3680443931084062e-05, "loss": 2.957870292663574, "step": 39200 }, { "epoch": 0.3165083183326203, "grad_norm": 1.004762053489685, "learning_rate": 1.367882846134585e-05, "loss": 2.9045793533325197, "step": 39210 }, { "epoch": 0.31658903965838736, "grad_norm": 0.9687034487724304, "learning_rate": 1.3677212991607636e-05, "loss": 2.6839746475219726, "step": 39220 }, { "epoch": 0.3166697609841544, "grad_norm": 0.8222223520278931, "learning_rate": 1.3675597521869423e-05, "loss": 3.0620752334594727, "step": 39230 }, { "epoch": 0.3167504823099215, "grad_norm": 0.7171282768249512, "learning_rate": 1.367398205213121e-05, "loss": 2.860835647583008, "step": 39240 }, { "epoch": 0.3168312036356885, "grad_norm": 1.3947681188583374, "learning_rate": 1.3672366582392997e-05, "loss": 3.1449493408203124, "step": 39250 }, { "epoch": 0.3169119249614556, "grad_norm": 1.1173988580703735, "learning_rate": 1.3670751112654783e-05, "loss": 3.0520952224731444, "step": 39260 }, { "epoch": 0.3169926462872226, "grad_norm": 0.5935187339782715, "learning_rate": 1.366913564291657e-05, "loss": 3.002649116516113, "step": 39270 }, { "epoch": 0.31707336761298965, "grad_norm": 2.229356288909912, "learning_rate": 1.3667520173178357e-05, "loss": 3.5246612548828127, "step": 39280 }, { "epoch": 0.31715408893875674, "grad_norm": 0.8599663376808167, "learning_rate": 1.3665904703440144e-05, "loss": 3.421725845336914, "step": 39290 }, { "epoch": 0.31723481026452377, "grad_norm": 1.2065173387527466, "learning_rate": 1.366428923370193e-05, "loss": 3.303508758544922, "step": 39300 }, { "epoch": 0.31731553159029086, "grad_norm": 0.811497151851654, "learning_rate": 1.3662673763963718e-05, "loss": 2.8078706741333006, "step": 39310 }, { "epoch": 0.3173962529160579, "grad_norm": 1.2478922605514526, "learning_rate": 1.3661058294225504e-05, "loss": 2.955815887451172, "step": 39320 }, { "epoch": 0.31747697424182497, "grad_norm": 0.788475751876831, "learning_rate": 1.3659442824487292e-05, "loss": 3.2212318420410155, "step": 39330 }, { "epoch": 0.317557695567592, "grad_norm": 0.7446308135986328, "learning_rate": 1.3657827354749078e-05, "loss": 2.9116634368896483, "step": 39340 }, { "epoch": 0.31763841689335903, "grad_norm": 0.799841582775116, "learning_rate": 1.3656211885010865e-05, "loss": 2.5731040954589846, "step": 39350 }, { "epoch": 0.3177191382191261, "grad_norm": 0.7532861828804016, "learning_rate": 1.3654596415272651e-05, "loss": 2.862204742431641, "step": 39360 }, { "epoch": 0.31779985954489315, "grad_norm": 0.7291314601898193, "learning_rate": 1.3652980945534439e-05, "loss": 3.258744812011719, "step": 39370 }, { "epoch": 0.31788058087066023, "grad_norm": 0.5948667526245117, "learning_rate": 1.3651365475796225e-05, "loss": 2.8800098419189455, "step": 39380 }, { "epoch": 0.31796130219642726, "grad_norm": 1.2029080390930176, "learning_rate": 1.3649750006058013e-05, "loss": 3.643244171142578, "step": 39390 }, { "epoch": 0.31804202352219435, "grad_norm": 0.8126194477081299, "learning_rate": 1.3648134536319799e-05, "loss": 3.117873954772949, "step": 39400 }, { "epoch": 0.3181227448479614, "grad_norm": 0.6947267055511475, "learning_rate": 1.3646519066581586e-05, "loss": 2.977647590637207, "step": 39410 }, { "epoch": 0.3182034661737284, "grad_norm": 0.8947028517723083, "learning_rate": 1.3644903596843372e-05, "loss": 2.8461177825927733, "step": 39420 }, { "epoch": 0.3182841874994955, "grad_norm": 1.1615047454833984, "learning_rate": 1.364328812710516e-05, "loss": 2.4461648941040037, "step": 39430 }, { "epoch": 0.3183649088252625, "grad_norm": 1.050642490386963, "learning_rate": 1.3641672657366946e-05, "loss": 2.856110382080078, "step": 39440 }, { "epoch": 0.3184456301510296, "grad_norm": 0.8364672660827637, "learning_rate": 1.3640057187628735e-05, "loss": 2.9263385772705077, "step": 39450 }, { "epoch": 0.31852635147679664, "grad_norm": 0.9007551670074463, "learning_rate": 1.363844171789052e-05, "loss": 2.5933500289916993, "step": 39460 }, { "epoch": 0.31860707280256373, "grad_norm": 0.8550246357917786, "learning_rate": 1.363682624815231e-05, "loss": 2.7543779373168946, "step": 39470 }, { "epoch": 0.31868779412833076, "grad_norm": 0.8681768774986267, "learning_rate": 1.3635210778414093e-05, "loss": 2.903084373474121, "step": 39480 }, { "epoch": 0.31876851545409784, "grad_norm": 0.9940814971923828, "learning_rate": 1.3633595308675883e-05, "loss": 3.1039072036743165, "step": 39490 }, { "epoch": 0.3188492367798649, "grad_norm": 1.2441673278808594, "learning_rate": 1.3631979838937667e-05, "loss": 2.7897205352783203, "step": 39500 }, { "epoch": 0.3189299581056319, "grad_norm": 0.8370952606201172, "learning_rate": 1.3630364369199457e-05, "loss": 2.9634092330932615, "step": 39510 }, { "epoch": 0.319010679431399, "grad_norm": 0.8812272548675537, "learning_rate": 1.3628748899461241e-05, "loss": 3.395760345458984, "step": 39520 }, { "epoch": 0.319091400757166, "grad_norm": 1.316575527191162, "learning_rate": 1.362713342972303e-05, "loss": 3.128119468688965, "step": 39530 }, { "epoch": 0.3191721220829331, "grad_norm": 0.7407028675079346, "learning_rate": 1.3625517959984815e-05, "loss": 3.1827720642089843, "step": 39540 }, { "epoch": 0.31925284340870014, "grad_norm": 1.601192593574524, "learning_rate": 1.3623902490246604e-05, "loss": 3.631710433959961, "step": 39550 }, { "epoch": 0.3193335647344672, "grad_norm": 1.1985139846801758, "learning_rate": 1.3622287020508388e-05, "loss": 3.5429126739501955, "step": 39560 }, { "epoch": 0.31941428606023425, "grad_norm": 0.92843097448349, "learning_rate": 1.3620671550770178e-05, "loss": 3.335213470458984, "step": 39570 }, { "epoch": 0.3194950073860013, "grad_norm": 0.5935670137405396, "learning_rate": 1.3619056081031962e-05, "loss": 2.896958923339844, "step": 39580 }, { "epoch": 0.31957572871176837, "grad_norm": 1.1110669374465942, "learning_rate": 1.3617440611293751e-05, "loss": 2.971991539001465, "step": 39590 }, { "epoch": 0.3196564500375354, "grad_norm": 0.9020702838897705, "learning_rate": 1.3615825141555536e-05, "loss": 2.652667999267578, "step": 39600 }, { "epoch": 0.3197371713633025, "grad_norm": 1.1109801530838013, "learning_rate": 1.3614209671817325e-05, "loss": 3.0925930023193358, "step": 39610 }, { "epoch": 0.3198178926890695, "grad_norm": 0.8975703716278076, "learning_rate": 1.3612594202079111e-05, "loss": 3.113003730773926, "step": 39620 }, { "epoch": 0.3198986140148366, "grad_norm": 1.6914559602737427, "learning_rate": 1.3610978732340899e-05, "loss": 2.7421072006225584, "step": 39630 }, { "epoch": 0.31997933534060363, "grad_norm": 0.6881662607192993, "learning_rate": 1.3609363262602685e-05, "loss": 2.5797245025634767, "step": 39640 }, { "epoch": 0.32006005666637066, "grad_norm": 0.977184534072876, "learning_rate": 1.3607747792864472e-05, "loss": 2.931719207763672, "step": 39650 }, { "epoch": 0.32014077799213775, "grad_norm": 0.6762261390686035, "learning_rate": 1.3606132323126258e-05, "loss": 3.3787765502929688, "step": 39660 }, { "epoch": 0.3202214993179048, "grad_norm": 0.8639407753944397, "learning_rate": 1.3604516853388046e-05, "loss": 2.87429256439209, "step": 39670 }, { "epoch": 0.32030222064367186, "grad_norm": 0.7721222639083862, "learning_rate": 1.3602901383649832e-05, "loss": 3.3398773193359377, "step": 39680 }, { "epoch": 0.3203829419694389, "grad_norm": 1.3749443292617798, "learning_rate": 1.360128591391162e-05, "loss": 3.2259212493896485, "step": 39690 }, { "epoch": 0.320463663295206, "grad_norm": 0.6994277238845825, "learning_rate": 1.3599670444173406e-05, "loss": 3.4229320526123046, "step": 39700 }, { "epoch": 0.320544384620973, "grad_norm": 1.138308048248291, "learning_rate": 1.3598054974435193e-05, "loss": 3.2059722900390626, "step": 39710 }, { "epoch": 0.3206251059467401, "grad_norm": 0.9084972143173218, "learning_rate": 1.359643950469698e-05, "loss": 3.504231643676758, "step": 39720 }, { "epoch": 0.3207058272725071, "grad_norm": 0.8389878869056702, "learning_rate": 1.3594824034958767e-05, "loss": 2.9879934310913088, "step": 39730 }, { "epoch": 0.32078654859827416, "grad_norm": 0.9177038073539734, "learning_rate": 1.3593208565220553e-05, "loss": 3.1824832916259767, "step": 39740 }, { "epoch": 0.32086726992404124, "grad_norm": 0.5602289438247681, "learning_rate": 1.3591593095482341e-05, "loss": 2.7483152389526366, "step": 39750 }, { "epoch": 0.3209479912498083, "grad_norm": 0.963169276714325, "learning_rate": 1.3589977625744127e-05, "loss": 3.1126693725585937, "step": 39760 }, { "epoch": 0.32102871257557536, "grad_norm": 0.8146944642066956, "learning_rate": 1.3588362156005915e-05, "loss": 3.2906490325927735, "step": 39770 }, { "epoch": 0.3211094339013424, "grad_norm": 1.066206932067871, "learning_rate": 1.35867466862677e-05, "loss": 2.8355037689208986, "step": 39780 }, { "epoch": 0.3211901552271095, "grad_norm": 0.7814667820930481, "learning_rate": 1.3585131216529488e-05, "loss": 3.6922481536865233, "step": 39790 }, { "epoch": 0.3212708765528765, "grad_norm": 1.1850212812423706, "learning_rate": 1.3583515746791274e-05, "loss": 2.6268953323364257, "step": 39800 }, { "epoch": 0.32135159787864354, "grad_norm": 1.2028151750564575, "learning_rate": 1.3581900277053062e-05, "loss": 3.0600379943847655, "step": 39810 }, { "epoch": 0.3214323192044106, "grad_norm": 1.4014071226119995, "learning_rate": 1.3580284807314848e-05, "loss": 2.9199804306030273, "step": 39820 }, { "epoch": 0.32151304053017765, "grad_norm": 0.7888873219490051, "learning_rate": 1.3578669337576636e-05, "loss": 2.7573734283447267, "step": 39830 }, { "epoch": 0.32159376185594474, "grad_norm": 0.9419317245483398, "learning_rate": 1.3577053867838422e-05, "loss": 2.96490421295166, "step": 39840 }, { "epoch": 0.32167448318171177, "grad_norm": 0.8414673209190369, "learning_rate": 1.357543839810021e-05, "loss": 2.994444465637207, "step": 39850 }, { "epoch": 0.32175520450747885, "grad_norm": 0.6141294240951538, "learning_rate": 1.3573822928361995e-05, "loss": 3.3189937591552736, "step": 39860 }, { "epoch": 0.3218359258332459, "grad_norm": 0.635941207408905, "learning_rate": 1.3572207458623783e-05, "loss": 2.959299850463867, "step": 39870 }, { "epoch": 0.3219166471590129, "grad_norm": 1.2032526731491089, "learning_rate": 1.3570591988885569e-05, "loss": 3.076715850830078, "step": 39880 }, { "epoch": 0.32199736848478, "grad_norm": 0.9822118282318115, "learning_rate": 1.3568976519147357e-05, "loss": 2.673203468322754, "step": 39890 }, { "epoch": 0.32207808981054703, "grad_norm": 1.6585242748260498, "learning_rate": 1.3567361049409143e-05, "loss": 2.7520198822021484, "step": 39900 }, { "epoch": 0.3221588111363141, "grad_norm": 0.8043695688247681, "learning_rate": 1.356574557967093e-05, "loss": 3.0479219436645506, "step": 39910 }, { "epoch": 0.32223953246208115, "grad_norm": 1.0145397186279297, "learning_rate": 1.3564130109932716e-05, "loss": 3.087425422668457, "step": 39920 }, { "epoch": 0.32232025378784823, "grad_norm": 1.260232925415039, "learning_rate": 1.3562514640194504e-05, "loss": 3.295360565185547, "step": 39930 }, { "epoch": 0.32240097511361526, "grad_norm": 0.8096426725387573, "learning_rate": 1.356089917045629e-05, "loss": 2.6329120635986327, "step": 39940 }, { "epoch": 0.3224816964393823, "grad_norm": 0.9587282538414001, "learning_rate": 1.3559283700718078e-05, "loss": 2.8214242935180662, "step": 39950 }, { "epoch": 0.3225624177651494, "grad_norm": 1.202423334121704, "learning_rate": 1.3557668230979864e-05, "loss": 3.4106189727783205, "step": 39960 }, { "epoch": 0.3226431390909164, "grad_norm": 1.4221038818359375, "learning_rate": 1.3556052761241651e-05, "loss": 2.9357057571411134, "step": 39970 }, { "epoch": 0.3227238604166835, "grad_norm": 0.9111809134483337, "learning_rate": 1.3554437291503437e-05, "loss": 2.709053611755371, "step": 39980 }, { "epoch": 0.3228045817424505, "grad_norm": 0.9917678833007812, "learning_rate": 1.3552821821765225e-05, "loss": 2.9309061050415037, "step": 39990 }, { "epoch": 0.3228853030682176, "grad_norm": 1.3044250011444092, "learning_rate": 1.3551206352027011e-05, "loss": 2.7662260055541994, "step": 40000 }, { "epoch": 0.32296602439398464, "grad_norm": 0.5714921355247498, "learning_rate": 1.3549590882288799e-05, "loss": 3.2247318267822265, "step": 40010 }, { "epoch": 0.3230467457197517, "grad_norm": 1.332377314567566, "learning_rate": 1.3547975412550585e-05, "loss": 3.165892791748047, "step": 40020 }, { "epoch": 0.32312746704551876, "grad_norm": 0.9927858114242554, "learning_rate": 1.3546359942812373e-05, "loss": 2.8972919464111326, "step": 40030 }, { "epoch": 0.3232081883712858, "grad_norm": 1.1567180156707764, "learning_rate": 1.3544744473074159e-05, "loss": 3.0331098556518556, "step": 40040 }, { "epoch": 0.3232889096970529, "grad_norm": 1.303855299949646, "learning_rate": 1.3543129003335946e-05, "loss": 2.655655097961426, "step": 40050 }, { "epoch": 0.3233696310228199, "grad_norm": 1.2815918922424316, "learning_rate": 1.3541513533597732e-05, "loss": 3.6316455841064452, "step": 40060 }, { "epoch": 0.323450352348587, "grad_norm": 0.6107091307640076, "learning_rate": 1.353989806385952e-05, "loss": 2.795887756347656, "step": 40070 }, { "epoch": 0.323531073674354, "grad_norm": 1.0319757461547852, "learning_rate": 1.3538282594121306e-05, "loss": 2.876111602783203, "step": 40080 }, { "epoch": 0.3236117950001211, "grad_norm": 1.022998332977295, "learning_rate": 1.3536667124383094e-05, "loss": 2.6688013076782227, "step": 40090 }, { "epoch": 0.32369251632588814, "grad_norm": 1.0892218351364136, "learning_rate": 1.353505165464488e-05, "loss": 2.8555534362792967, "step": 40100 }, { "epoch": 0.32377323765165517, "grad_norm": 1.0865323543548584, "learning_rate": 1.3533436184906667e-05, "loss": 3.4885189056396486, "step": 40110 }, { "epoch": 0.32385395897742225, "grad_norm": 0.975284993648529, "learning_rate": 1.3531820715168453e-05, "loss": 2.8417139053344727, "step": 40120 }, { "epoch": 0.3239346803031893, "grad_norm": 1.564969539642334, "learning_rate": 1.3530205245430241e-05, "loss": 3.205611801147461, "step": 40130 }, { "epoch": 0.32401540162895637, "grad_norm": 0.899908185005188, "learning_rate": 1.3528589775692027e-05, "loss": 3.427934265136719, "step": 40140 }, { "epoch": 0.3240961229547234, "grad_norm": 1.1541070938110352, "learning_rate": 1.3526974305953815e-05, "loss": 3.201161575317383, "step": 40150 }, { "epoch": 0.3241768442804905, "grad_norm": 0.7559607625007629, "learning_rate": 1.35253588362156e-05, "loss": 2.940290832519531, "step": 40160 }, { "epoch": 0.3242575656062575, "grad_norm": 1.7737116813659668, "learning_rate": 1.3523743366477388e-05, "loss": 3.522577667236328, "step": 40170 }, { "epoch": 0.32433828693202454, "grad_norm": 2.896682024002075, "learning_rate": 1.3522127896739174e-05, "loss": 3.3110725402832033, "step": 40180 }, { "epoch": 0.32441900825779163, "grad_norm": 1.1332319974899292, "learning_rate": 1.3520512427000962e-05, "loss": 3.2786773681640624, "step": 40190 }, { "epoch": 0.32449972958355866, "grad_norm": 1.2862434387207031, "learning_rate": 1.3518896957262748e-05, "loss": 2.589582824707031, "step": 40200 }, { "epoch": 0.32458045090932575, "grad_norm": 1.2328321933746338, "learning_rate": 1.3517281487524536e-05, "loss": 3.6933029174804686, "step": 40210 }, { "epoch": 0.3246611722350928, "grad_norm": 1.2631574869155884, "learning_rate": 1.3515666017786322e-05, "loss": 2.4234195709228517, "step": 40220 }, { "epoch": 0.32474189356085986, "grad_norm": 1.0498305559158325, "learning_rate": 1.351405054804811e-05, "loss": 2.932485580444336, "step": 40230 }, { "epoch": 0.3248226148866269, "grad_norm": 0.7212077975273132, "learning_rate": 1.3512435078309895e-05, "loss": 2.919719696044922, "step": 40240 }, { "epoch": 0.324903336212394, "grad_norm": 0.9412869811058044, "learning_rate": 1.3510819608571683e-05, "loss": 2.9810455322265623, "step": 40250 }, { "epoch": 0.324984057538161, "grad_norm": 1.0274289846420288, "learning_rate": 1.3509204138833469e-05, "loss": 3.0032392501831056, "step": 40260 }, { "epoch": 0.32506477886392804, "grad_norm": 3.6702322959899902, "learning_rate": 1.3507588669095257e-05, "loss": 2.8785379409790037, "step": 40270 }, { "epoch": 0.3251455001896951, "grad_norm": 1.0594556331634521, "learning_rate": 1.3505973199357043e-05, "loss": 3.547829437255859, "step": 40280 }, { "epoch": 0.32522622151546215, "grad_norm": 0.9619686007499695, "learning_rate": 1.350435772961883e-05, "loss": 3.157623291015625, "step": 40290 }, { "epoch": 0.32530694284122924, "grad_norm": 0.9283003807067871, "learning_rate": 1.3502742259880617e-05, "loss": 3.4951873779296876, "step": 40300 }, { "epoch": 0.32538766416699627, "grad_norm": 1.1483557224273682, "learning_rate": 1.3501126790142404e-05, "loss": 2.4941455841064455, "step": 40310 }, { "epoch": 0.32546838549276336, "grad_norm": 0.9074137806892395, "learning_rate": 1.349951132040419e-05, "loss": 2.707003593444824, "step": 40320 }, { "epoch": 0.3255491068185304, "grad_norm": 0.980902910232544, "learning_rate": 1.3497895850665978e-05, "loss": 2.5844699859619142, "step": 40330 }, { "epoch": 0.3256298281442974, "grad_norm": 1.1845495700836182, "learning_rate": 1.3496280380927764e-05, "loss": 2.967806816101074, "step": 40340 }, { "epoch": 0.3257105494700645, "grad_norm": 0.9426555633544922, "learning_rate": 1.3494664911189552e-05, "loss": 3.0857721328735352, "step": 40350 }, { "epoch": 0.32579127079583153, "grad_norm": 0.9346073269844055, "learning_rate": 1.3493049441451341e-05, "loss": 3.043362045288086, "step": 40360 }, { "epoch": 0.3258719921215986, "grad_norm": 0.6644934415817261, "learning_rate": 1.3491433971713125e-05, "loss": 2.7899770736694336, "step": 40370 }, { "epoch": 0.32595271344736565, "grad_norm": 0.9310585856437683, "learning_rate": 1.3489818501974915e-05, "loss": 2.9715398788452148, "step": 40380 }, { "epoch": 0.32603343477313274, "grad_norm": 0.7584919333457947, "learning_rate": 1.3488203032236699e-05, "loss": 2.6826271057128905, "step": 40390 }, { "epoch": 0.32611415609889977, "grad_norm": 0.8203462362289429, "learning_rate": 1.3486587562498488e-05, "loss": 3.4154895782470702, "step": 40400 }, { "epoch": 0.3261948774246668, "grad_norm": 1.3202742338180542, "learning_rate": 1.3484972092760273e-05, "loss": 3.077031135559082, "step": 40410 }, { "epoch": 0.3262755987504339, "grad_norm": 0.7796246409416199, "learning_rate": 1.3483356623022062e-05, "loss": 2.8523441314697267, "step": 40420 }, { "epoch": 0.3263563200762009, "grad_norm": 0.9990280866622925, "learning_rate": 1.3481741153283846e-05, "loss": 2.7667057037353517, "step": 40430 }, { "epoch": 0.326437041401968, "grad_norm": 0.7432882785797119, "learning_rate": 1.3480125683545636e-05, "loss": 2.7676204681396483, "step": 40440 }, { "epoch": 0.32651776272773503, "grad_norm": 1.3132057189941406, "learning_rate": 1.347851021380742e-05, "loss": 2.9594499588012697, "step": 40450 }, { "epoch": 0.3265984840535021, "grad_norm": 1.6097028255462646, "learning_rate": 1.347689474406921e-05, "loss": 2.6844079971313475, "step": 40460 }, { "epoch": 0.32667920537926914, "grad_norm": 0.7595488429069519, "learning_rate": 1.3475279274330994e-05, "loss": 3.2957557678222655, "step": 40470 }, { "epoch": 0.32675992670503623, "grad_norm": 0.5991522669792175, "learning_rate": 1.3473663804592783e-05, "loss": 2.9296125411987304, "step": 40480 }, { "epoch": 0.32684064803080326, "grad_norm": 0.8724828958511353, "learning_rate": 1.3472048334854569e-05, "loss": 2.684419059753418, "step": 40490 }, { "epoch": 0.3269213693565703, "grad_norm": 0.5722756385803223, "learning_rate": 1.3470432865116357e-05, "loss": 2.501430130004883, "step": 40500 }, { "epoch": 0.3270020906823374, "grad_norm": 0.9420396089553833, "learning_rate": 1.3468817395378143e-05, "loss": 3.2415756225585937, "step": 40510 }, { "epoch": 0.3270828120081044, "grad_norm": 1.621282696723938, "learning_rate": 1.346720192563993e-05, "loss": 3.0403127670288086, "step": 40520 }, { "epoch": 0.3271635333338715, "grad_norm": 1.3820074796676636, "learning_rate": 1.3465586455901717e-05, "loss": 2.8395156860351562, "step": 40530 }, { "epoch": 0.3272442546596385, "grad_norm": 1.3821146488189697, "learning_rate": 1.3463970986163504e-05, "loss": 2.8680492401123048, "step": 40540 }, { "epoch": 0.3273249759854056, "grad_norm": 0.6399001479148865, "learning_rate": 1.346235551642529e-05, "loss": 3.039657974243164, "step": 40550 }, { "epoch": 0.32740569731117264, "grad_norm": 0.6759233474731445, "learning_rate": 1.3460740046687078e-05, "loss": 3.2584815979003907, "step": 40560 }, { "epoch": 0.32748641863693967, "grad_norm": 1.6921619176864624, "learning_rate": 1.3459124576948864e-05, "loss": 2.7218753814697267, "step": 40570 }, { "epoch": 0.32756713996270675, "grad_norm": 1.5912991762161255, "learning_rate": 1.3457509107210652e-05, "loss": 2.96212100982666, "step": 40580 }, { "epoch": 0.3276478612884738, "grad_norm": 0.9868085384368896, "learning_rate": 1.3455893637472438e-05, "loss": 3.099913787841797, "step": 40590 }, { "epoch": 0.32772858261424087, "grad_norm": 1.1434992551803589, "learning_rate": 1.3454278167734225e-05, "loss": 2.816599464416504, "step": 40600 }, { "epoch": 0.3278093039400079, "grad_norm": 0.8328906893730164, "learning_rate": 1.3452662697996011e-05, "loss": 3.036044120788574, "step": 40610 }, { "epoch": 0.327890025265775, "grad_norm": 0.5945713520050049, "learning_rate": 1.3451047228257799e-05, "loss": 2.6925836563110352, "step": 40620 }, { "epoch": 0.327970746591542, "grad_norm": 0.7442622780799866, "learning_rate": 1.3449431758519585e-05, "loss": 2.7552371978759767, "step": 40630 }, { "epoch": 0.32805146791730905, "grad_norm": 1.2735973596572876, "learning_rate": 1.3447816288781373e-05, "loss": 3.242543411254883, "step": 40640 }, { "epoch": 0.32813218924307613, "grad_norm": 1.383596420288086, "learning_rate": 1.3446200819043159e-05, "loss": 3.0547542572021484, "step": 40650 }, { "epoch": 0.32821291056884316, "grad_norm": 1.1577118635177612, "learning_rate": 1.3444585349304946e-05, "loss": 3.359549331665039, "step": 40660 }, { "epoch": 0.32829363189461025, "grad_norm": 0.8822405338287354, "learning_rate": 1.3442969879566732e-05, "loss": 3.5205291748046874, "step": 40670 }, { "epoch": 0.3283743532203773, "grad_norm": 1.0476205348968506, "learning_rate": 1.344135440982852e-05, "loss": 3.4603179931640624, "step": 40680 }, { "epoch": 0.32845507454614437, "grad_norm": 1.2255146503448486, "learning_rate": 1.3439738940090306e-05, "loss": 2.866155433654785, "step": 40690 }, { "epoch": 0.3285357958719114, "grad_norm": 0.7260087132453918, "learning_rate": 1.3438123470352094e-05, "loss": 2.966269874572754, "step": 40700 }, { "epoch": 0.3286165171976785, "grad_norm": 0.7065218687057495, "learning_rate": 1.343650800061388e-05, "loss": 2.7122339248657226, "step": 40710 }, { "epoch": 0.3286972385234455, "grad_norm": 1.4644461870193481, "learning_rate": 1.3434892530875667e-05, "loss": 3.4342281341552736, "step": 40720 }, { "epoch": 0.32877795984921254, "grad_norm": 1.2059202194213867, "learning_rate": 1.3433277061137453e-05, "loss": 3.020282745361328, "step": 40730 }, { "epoch": 0.32885868117497963, "grad_norm": 1.1045111417770386, "learning_rate": 1.3431661591399241e-05, "loss": 2.877900505065918, "step": 40740 }, { "epoch": 0.32893940250074666, "grad_norm": 1.0852473974227905, "learning_rate": 1.3430046121661027e-05, "loss": 2.8654394149780273, "step": 40750 }, { "epoch": 0.32902012382651374, "grad_norm": 0.900675356388092, "learning_rate": 1.3428430651922815e-05, "loss": 2.710366058349609, "step": 40760 }, { "epoch": 0.3291008451522808, "grad_norm": 0.9750630855560303, "learning_rate": 1.34268151821846e-05, "loss": 2.9871536254882813, "step": 40770 }, { "epoch": 0.32918156647804786, "grad_norm": 1.1931062936782837, "learning_rate": 1.3425199712446389e-05, "loss": 2.9231182098388673, "step": 40780 }, { "epoch": 0.3292622878038149, "grad_norm": 0.9826894402503967, "learning_rate": 1.3423584242708175e-05, "loss": 2.9984415054321287, "step": 40790 }, { "epoch": 0.3293430091295819, "grad_norm": 1.041500449180603, "learning_rate": 1.3421968772969962e-05, "loss": 3.091617393493652, "step": 40800 }, { "epoch": 0.329423730455349, "grad_norm": 0.49881860613822937, "learning_rate": 1.3420353303231748e-05, "loss": 2.698861312866211, "step": 40810 }, { "epoch": 0.32950445178111604, "grad_norm": 1.103922724723816, "learning_rate": 1.3418737833493536e-05, "loss": 2.981678771972656, "step": 40820 }, { "epoch": 0.3295851731068831, "grad_norm": 1.0783767700195312, "learning_rate": 1.3417122363755322e-05, "loss": 2.7342809677124023, "step": 40830 }, { "epoch": 0.32966589443265015, "grad_norm": 0.5655770897865295, "learning_rate": 1.341550689401711e-05, "loss": 3.2719024658203124, "step": 40840 }, { "epoch": 0.32974661575841724, "grad_norm": 1.5937198400497437, "learning_rate": 1.3413891424278896e-05, "loss": 3.655470275878906, "step": 40850 }, { "epoch": 0.32982733708418427, "grad_norm": 0.8878530859947205, "learning_rate": 1.3412275954540683e-05, "loss": 2.987780952453613, "step": 40860 }, { "epoch": 0.3299080584099513, "grad_norm": 1.027181625366211, "learning_rate": 1.341066048480247e-05, "loss": 2.9993564605712892, "step": 40870 }, { "epoch": 0.3299887797357184, "grad_norm": 1.471436619758606, "learning_rate": 1.3409045015064257e-05, "loss": 2.6507326126098634, "step": 40880 }, { "epoch": 0.3300695010614854, "grad_norm": 0.919568657875061, "learning_rate": 1.3407429545326043e-05, "loss": 3.312439727783203, "step": 40890 }, { "epoch": 0.3301502223872525, "grad_norm": 0.6567249894142151, "learning_rate": 1.340581407558783e-05, "loss": 3.169072151184082, "step": 40900 }, { "epoch": 0.33023094371301953, "grad_norm": 0.8772899508476257, "learning_rate": 1.3404198605849617e-05, "loss": 2.936755561828613, "step": 40910 }, { "epoch": 0.3303116650387866, "grad_norm": 0.7762523889541626, "learning_rate": 1.3402583136111404e-05, "loss": 3.0285778045654297, "step": 40920 }, { "epoch": 0.33039238636455365, "grad_norm": 0.9240770936012268, "learning_rate": 1.340096766637319e-05, "loss": 2.9921661376953126, "step": 40930 }, { "epoch": 0.33047310769032073, "grad_norm": 1.2484922409057617, "learning_rate": 1.3399352196634978e-05, "loss": 3.10012149810791, "step": 40940 }, { "epoch": 0.33055382901608776, "grad_norm": 0.9498218297958374, "learning_rate": 1.3397736726896764e-05, "loss": 3.223277282714844, "step": 40950 }, { "epoch": 0.3306345503418548, "grad_norm": 0.738555908203125, "learning_rate": 1.3396121257158552e-05, "loss": 3.551662826538086, "step": 40960 }, { "epoch": 0.3307152716676219, "grad_norm": 1.3233816623687744, "learning_rate": 1.3394505787420338e-05, "loss": 3.518075180053711, "step": 40970 }, { "epoch": 0.3307959929933889, "grad_norm": 0.8607803583145142, "learning_rate": 1.3392890317682125e-05, "loss": 3.0906801223754883, "step": 40980 }, { "epoch": 0.330876714319156, "grad_norm": 1.778277039527893, "learning_rate": 1.3391274847943911e-05, "loss": 3.0401079177856447, "step": 40990 }, { "epoch": 0.330957435644923, "grad_norm": 1.2024421691894531, "learning_rate": 1.3389659378205699e-05, "loss": 2.829544448852539, "step": 41000 }, { "epoch": 0.3310381569706901, "grad_norm": 0.8977139592170715, "learning_rate": 1.3388043908467485e-05, "loss": 3.173709678649902, "step": 41010 }, { "epoch": 0.33111887829645714, "grad_norm": 1.0602383613586426, "learning_rate": 1.3386428438729273e-05, "loss": 3.129743766784668, "step": 41020 }, { "epoch": 0.3311995996222242, "grad_norm": 0.8425291180610657, "learning_rate": 1.3384812968991059e-05, "loss": 3.4402870178222655, "step": 41030 }, { "epoch": 0.33128032094799126, "grad_norm": 1.0619874000549316, "learning_rate": 1.3383197499252846e-05, "loss": 3.0219852447509767, "step": 41040 }, { "epoch": 0.3313610422737583, "grad_norm": 0.7042098045349121, "learning_rate": 1.3381582029514632e-05, "loss": 2.6923160552978516, "step": 41050 }, { "epoch": 0.3314417635995254, "grad_norm": 0.9198212027549744, "learning_rate": 1.337996655977642e-05, "loss": 2.747940444946289, "step": 41060 }, { "epoch": 0.3315224849252924, "grad_norm": 0.9948573112487793, "learning_rate": 1.3378351090038206e-05, "loss": 2.887459564208984, "step": 41070 }, { "epoch": 0.3316032062510595, "grad_norm": 0.6623364090919495, "learning_rate": 1.3376735620299994e-05, "loss": 2.8268932342529296, "step": 41080 }, { "epoch": 0.3316839275768265, "grad_norm": 1.877231478691101, "learning_rate": 1.337512015056178e-05, "loss": 3.102176284790039, "step": 41090 }, { "epoch": 0.33176464890259355, "grad_norm": 1.5415157079696655, "learning_rate": 1.3373504680823568e-05, "loss": 3.697576904296875, "step": 41100 }, { "epoch": 0.33184537022836064, "grad_norm": 0.8567975759506226, "learning_rate": 1.3371889211085354e-05, "loss": 3.049610900878906, "step": 41110 }, { "epoch": 0.33192609155412767, "grad_norm": 0.9024821519851685, "learning_rate": 1.3370273741347141e-05, "loss": 3.172087287902832, "step": 41120 }, { "epoch": 0.33200681287989475, "grad_norm": 1.2021777629852295, "learning_rate": 1.3368658271608927e-05, "loss": 3.1570718765258787, "step": 41130 }, { "epoch": 0.3320875342056618, "grad_norm": 1.3268568515777588, "learning_rate": 1.3367042801870715e-05, "loss": 3.075827217102051, "step": 41140 }, { "epoch": 0.33216825553142887, "grad_norm": 0.7986438870429993, "learning_rate": 1.3365427332132501e-05, "loss": 2.9990345001220704, "step": 41150 }, { "epoch": 0.3322489768571959, "grad_norm": 0.6636921763420105, "learning_rate": 1.3363811862394289e-05, "loss": 3.3253250122070312, "step": 41160 }, { "epoch": 0.33232969818296293, "grad_norm": 1.2248716354370117, "learning_rate": 1.3362196392656075e-05, "loss": 2.995327949523926, "step": 41170 }, { "epoch": 0.33241041950873, "grad_norm": 0.8724003434181213, "learning_rate": 1.3360580922917862e-05, "loss": 3.0091968536376954, "step": 41180 }, { "epoch": 0.33249114083449705, "grad_norm": 1.1713165044784546, "learning_rate": 1.3358965453179648e-05, "loss": 3.4167930603027346, "step": 41190 }, { "epoch": 0.33257186216026413, "grad_norm": 1.158494234085083, "learning_rate": 1.3357349983441436e-05, "loss": 3.1971153259277343, "step": 41200 }, { "epoch": 0.33265258348603116, "grad_norm": 0.7269444465637207, "learning_rate": 1.3355734513703222e-05, "loss": 3.2979862213134767, "step": 41210 }, { "epoch": 0.33273330481179825, "grad_norm": 1.5322355031967163, "learning_rate": 1.335411904396501e-05, "loss": 3.261504364013672, "step": 41220 }, { "epoch": 0.3328140261375653, "grad_norm": 0.905684769153595, "learning_rate": 1.3352503574226796e-05, "loss": 3.169014739990234, "step": 41230 }, { "epoch": 0.33289474746333236, "grad_norm": 0.9709617495536804, "learning_rate": 1.3350888104488583e-05, "loss": 2.9138050079345703, "step": 41240 }, { "epoch": 0.3329754687890994, "grad_norm": 0.8796635866165161, "learning_rate": 1.334927263475037e-05, "loss": 3.2197032928466798, "step": 41250 }, { "epoch": 0.3330561901148664, "grad_norm": 0.616312563419342, "learning_rate": 1.3347657165012157e-05, "loss": 2.795314598083496, "step": 41260 }, { "epoch": 0.3331369114406335, "grad_norm": 0.7758647203445435, "learning_rate": 1.3346041695273943e-05, "loss": 2.907061004638672, "step": 41270 }, { "epoch": 0.33321763276640054, "grad_norm": 1.7043662071228027, "learning_rate": 1.334442622553573e-05, "loss": 3.271376037597656, "step": 41280 }, { "epoch": 0.3332983540921676, "grad_norm": 0.9522733092308044, "learning_rate": 1.3342810755797517e-05, "loss": 2.92471923828125, "step": 41290 }, { "epoch": 0.33337907541793466, "grad_norm": 0.6198520660400391, "learning_rate": 1.3341195286059304e-05, "loss": 3.0161230087280275, "step": 41300 }, { "epoch": 0.33345979674370174, "grad_norm": 1.2365436553955078, "learning_rate": 1.333957981632109e-05, "loss": 3.0801729202270507, "step": 41310 }, { "epoch": 0.33354051806946877, "grad_norm": 0.770654022693634, "learning_rate": 1.3337964346582878e-05, "loss": 2.731405830383301, "step": 41320 }, { "epoch": 0.3336212393952358, "grad_norm": 1.6648023128509521, "learning_rate": 1.3336348876844664e-05, "loss": 2.923999214172363, "step": 41330 }, { "epoch": 0.3337019607210029, "grad_norm": 0.80178302526474, "learning_rate": 1.3334733407106452e-05, "loss": 2.9353565216064452, "step": 41340 }, { "epoch": 0.3337826820467699, "grad_norm": 0.7239142060279846, "learning_rate": 1.3333117937368238e-05, "loss": 3.3666725158691406, "step": 41350 }, { "epoch": 0.333863403372537, "grad_norm": 1.1241353750228882, "learning_rate": 1.3331502467630027e-05, "loss": 3.2077686309814455, "step": 41360 }, { "epoch": 0.33394412469830403, "grad_norm": 1.5656840801239014, "learning_rate": 1.3329886997891812e-05, "loss": 2.9684110641479493, "step": 41370 }, { "epoch": 0.3340248460240711, "grad_norm": 0.7157993316650391, "learning_rate": 1.3328271528153601e-05, "loss": 3.2092567443847657, "step": 41380 }, { "epoch": 0.33410556734983815, "grad_norm": 0.9730052947998047, "learning_rate": 1.3326656058415385e-05, "loss": 3.043183708190918, "step": 41390 }, { "epoch": 0.3341862886756052, "grad_norm": 0.6015478372573853, "learning_rate": 1.3325040588677175e-05, "loss": 2.6541975021362303, "step": 41400 }, { "epoch": 0.33426701000137227, "grad_norm": 0.9064708948135376, "learning_rate": 1.3323425118938959e-05, "loss": 2.8003395080566404, "step": 41410 }, { "epoch": 0.3343477313271393, "grad_norm": 1.40025794506073, "learning_rate": 1.3321809649200748e-05, "loss": 3.4295215606689453, "step": 41420 }, { "epoch": 0.3344284526529064, "grad_norm": 1.1471482515335083, "learning_rate": 1.3320194179462533e-05, "loss": 3.114857292175293, "step": 41430 }, { "epoch": 0.3345091739786734, "grad_norm": 1.1715803146362305, "learning_rate": 1.3318578709724322e-05, "loss": 3.155990409851074, "step": 41440 }, { "epoch": 0.3345898953044405, "grad_norm": 1.2248820066452026, "learning_rate": 1.3316963239986106e-05, "loss": 3.383350372314453, "step": 41450 }, { "epoch": 0.33467061663020753, "grad_norm": 1.0010181665420532, "learning_rate": 1.3315347770247896e-05, "loss": 3.1796342849731447, "step": 41460 }, { "epoch": 0.3347513379559746, "grad_norm": 1.4402440786361694, "learning_rate": 1.331373230050968e-05, "loss": 3.0916648864746095, "step": 41470 }, { "epoch": 0.33483205928174165, "grad_norm": 0.7685287594795227, "learning_rate": 1.331211683077147e-05, "loss": 2.4874486923217773, "step": 41480 }, { "epoch": 0.3349127806075087, "grad_norm": 1.1039378643035889, "learning_rate": 1.3310501361033254e-05, "loss": 3.1154577255249025, "step": 41490 }, { "epoch": 0.33499350193327576, "grad_norm": 0.6589305996894836, "learning_rate": 1.3308885891295043e-05, "loss": 3.3534561157226563, "step": 41500 }, { "epoch": 0.3350742232590428, "grad_norm": 0.6215343475341797, "learning_rate": 1.3307270421556827e-05, "loss": 2.9784494400024415, "step": 41510 }, { "epoch": 0.3351549445848099, "grad_norm": 0.7082102298736572, "learning_rate": 1.3305654951818617e-05, "loss": 2.9838325500488283, "step": 41520 }, { "epoch": 0.3352356659105769, "grad_norm": 0.714810311794281, "learning_rate": 1.3304039482080403e-05, "loss": 2.375644493103027, "step": 41530 }, { "epoch": 0.335316387236344, "grad_norm": 0.9083342552185059, "learning_rate": 1.330242401234219e-05, "loss": 2.6241996765136717, "step": 41540 }, { "epoch": 0.335397108562111, "grad_norm": 0.6574148535728455, "learning_rate": 1.3300808542603976e-05, "loss": 2.8536312103271486, "step": 41550 }, { "epoch": 0.33547782988787805, "grad_norm": 1.0776348114013672, "learning_rate": 1.3299193072865764e-05, "loss": 2.9054195404052736, "step": 41560 }, { "epoch": 0.33555855121364514, "grad_norm": 1.0941998958587646, "learning_rate": 1.329757760312755e-05, "loss": 2.9404651641845705, "step": 41570 }, { "epoch": 0.33563927253941217, "grad_norm": 0.6516469120979309, "learning_rate": 1.3295962133389338e-05, "loss": 3.2136844635009765, "step": 41580 }, { "epoch": 0.33571999386517926, "grad_norm": 1.3877488374710083, "learning_rate": 1.3294346663651124e-05, "loss": 2.816351890563965, "step": 41590 }, { "epoch": 0.3358007151909463, "grad_norm": 0.675987184047699, "learning_rate": 1.3292731193912912e-05, "loss": 3.191310501098633, "step": 41600 }, { "epoch": 0.33588143651671337, "grad_norm": 1.00364351272583, "learning_rate": 1.32911157241747e-05, "loss": 3.4848258972167967, "step": 41610 }, { "epoch": 0.3359621578424804, "grad_norm": 0.959018349647522, "learning_rate": 1.3289500254436485e-05, "loss": 3.0316673278808595, "step": 41620 }, { "epoch": 0.33604287916824743, "grad_norm": 0.9237996339797974, "learning_rate": 1.3287884784698273e-05, "loss": 2.9310985565185548, "step": 41630 }, { "epoch": 0.3361236004940145, "grad_norm": 1.5159242153167725, "learning_rate": 1.3286269314960059e-05, "loss": 3.341766357421875, "step": 41640 }, { "epoch": 0.33620432181978155, "grad_norm": 1.0120569467544556, "learning_rate": 1.3284653845221847e-05, "loss": 2.72523193359375, "step": 41650 }, { "epoch": 0.33628504314554863, "grad_norm": 0.6735564470291138, "learning_rate": 1.3283038375483633e-05, "loss": 2.942104148864746, "step": 41660 }, { "epoch": 0.33636576447131566, "grad_norm": 0.6764937043190002, "learning_rate": 1.328142290574542e-05, "loss": 2.6240610122680663, "step": 41670 }, { "epoch": 0.33644648579708275, "grad_norm": 0.9748018980026245, "learning_rate": 1.3279807436007206e-05, "loss": 2.6354747772216798, "step": 41680 }, { "epoch": 0.3365272071228498, "grad_norm": 0.8206234574317932, "learning_rate": 1.3278191966268994e-05, "loss": 3.007522773742676, "step": 41690 }, { "epoch": 0.33660792844861687, "grad_norm": 0.8536885976791382, "learning_rate": 1.327657649653078e-05, "loss": 2.9866607666015623, "step": 41700 }, { "epoch": 0.3366886497743839, "grad_norm": 1.135722041130066, "learning_rate": 1.3274961026792568e-05, "loss": 2.772366523742676, "step": 41710 }, { "epoch": 0.3367693711001509, "grad_norm": 1.2856987714767456, "learning_rate": 1.3273345557054354e-05, "loss": 2.89212589263916, "step": 41720 }, { "epoch": 0.336850092425918, "grad_norm": 1.2642457485198975, "learning_rate": 1.3271730087316141e-05, "loss": 2.9451362609863283, "step": 41730 }, { "epoch": 0.33693081375168504, "grad_norm": 0.7845278978347778, "learning_rate": 1.3270114617577927e-05, "loss": 2.977451515197754, "step": 41740 }, { "epoch": 0.33701153507745213, "grad_norm": 1.0560336112976074, "learning_rate": 1.3268499147839715e-05, "loss": 2.972075843811035, "step": 41750 }, { "epoch": 0.33709225640321916, "grad_norm": 0.9155007600784302, "learning_rate": 1.3266883678101501e-05, "loss": 2.759989929199219, "step": 41760 }, { "epoch": 0.33717297772898625, "grad_norm": 0.5278995633125305, "learning_rate": 1.3265268208363289e-05, "loss": 3.573736572265625, "step": 41770 }, { "epoch": 0.3372536990547533, "grad_norm": 1.7050981521606445, "learning_rate": 1.3263652738625075e-05, "loss": 3.323827362060547, "step": 41780 }, { "epoch": 0.3373344203805203, "grad_norm": 1.0870940685272217, "learning_rate": 1.3262037268886862e-05, "loss": 2.7554092407226562, "step": 41790 }, { "epoch": 0.3374151417062874, "grad_norm": 1.6836771965026855, "learning_rate": 1.3260421799148648e-05, "loss": 3.162049102783203, "step": 41800 }, { "epoch": 0.3374958630320544, "grad_norm": 0.570310652256012, "learning_rate": 1.3258806329410436e-05, "loss": 3.566263198852539, "step": 41810 }, { "epoch": 0.3375765843578215, "grad_norm": 1.9072281122207642, "learning_rate": 1.3257190859672222e-05, "loss": 2.9888687133789062, "step": 41820 }, { "epoch": 0.33765730568358854, "grad_norm": 0.830615222454071, "learning_rate": 1.325557538993401e-05, "loss": 2.927029037475586, "step": 41830 }, { "epoch": 0.3377380270093556, "grad_norm": 0.7823902368545532, "learning_rate": 1.3253959920195796e-05, "loss": 2.706346130371094, "step": 41840 }, { "epoch": 0.33781874833512265, "grad_norm": 1.3845223188400269, "learning_rate": 1.3252344450457584e-05, "loss": 2.999457359313965, "step": 41850 }, { "epoch": 0.3378994696608897, "grad_norm": 1.4625157117843628, "learning_rate": 1.325072898071937e-05, "loss": 3.047140693664551, "step": 41860 }, { "epoch": 0.33798019098665677, "grad_norm": 1.2635774612426758, "learning_rate": 1.3249113510981157e-05, "loss": 3.0680768966674803, "step": 41870 }, { "epoch": 0.3380609123124238, "grad_norm": 1.0673191547393799, "learning_rate": 1.3247498041242943e-05, "loss": 2.824222755432129, "step": 41880 }, { "epoch": 0.3381416336381909, "grad_norm": 0.7327674031257629, "learning_rate": 1.3245882571504731e-05, "loss": 2.4015989303588867, "step": 41890 }, { "epoch": 0.3382223549639579, "grad_norm": 1.0001832246780396, "learning_rate": 1.3244267101766517e-05, "loss": 2.9204164505004884, "step": 41900 }, { "epoch": 0.338303076289725, "grad_norm": 0.5984979867935181, "learning_rate": 1.3242651632028305e-05, "loss": 2.5345325469970703, "step": 41910 }, { "epoch": 0.33838379761549203, "grad_norm": 0.8855619430541992, "learning_rate": 1.324103616229009e-05, "loss": 2.7091203689575196, "step": 41920 }, { "epoch": 0.3384645189412591, "grad_norm": 0.7735236883163452, "learning_rate": 1.3239420692551878e-05, "loss": 2.9590784072875977, "step": 41930 }, { "epoch": 0.33854524026702615, "grad_norm": 0.9405710101127625, "learning_rate": 1.3237805222813664e-05, "loss": 2.7985639572143555, "step": 41940 }, { "epoch": 0.3386259615927932, "grad_norm": 1.2619119882583618, "learning_rate": 1.3236189753075452e-05, "loss": 2.8558963775634765, "step": 41950 }, { "epoch": 0.33870668291856026, "grad_norm": 1.4997996091842651, "learning_rate": 1.3234574283337238e-05, "loss": 3.376568603515625, "step": 41960 }, { "epoch": 0.3387874042443273, "grad_norm": 0.6644211411476135, "learning_rate": 1.3232958813599026e-05, "loss": 2.9336267471313477, "step": 41970 }, { "epoch": 0.3388681255700944, "grad_norm": 1.5167086124420166, "learning_rate": 1.3231343343860812e-05, "loss": 2.8779766082763674, "step": 41980 }, { "epoch": 0.3389488468958614, "grad_norm": 0.962198793888092, "learning_rate": 1.32297278741226e-05, "loss": 2.9025861740112306, "step": 41990 }, { "epoch": 0.3390295682216285, "grad_norm": 0.7801823019981384, "learning_rate": 1.3228112404384385e-05, "loss": 2.8569507598876953, "step": 42000 }, { "epoch": 0.3391102895473955, "grad_norm": 0.6377069354057312, "learning_rate": 1.3226496934646173e-05, "loss": 3.065367317199707, "step": 42010 }, { "epoch": 0.33919101087316256, "grad_norm": 0.9870229363441467, "learning_rate": 1.3224881464907959e-05, "loss": 2.9788930892944334, "step": 42020 }, { "epoch": 0.33927173219892964, "grad_norm": 0.8576526641845703, "learning_rate": 1.3223265995169747e-05, "loss": 2.730718231201172, "step": 42030 }, { "epoch": 0.3393524535246967, "grad_norm": 0.8593153953552246, "learning_rate": 1.3221650525431533e-05, "loss": 2.6921707153320313, "step": 42040 }, { "epoch": 0.33943317485046376, "grad_norm": 1.0757465362548828, "learning_rate": 1.322003505569332e-05, "loss": 2.760764312744141, "step": 42050 }, { "epoch": 0.3395138961762308, "grad_norm": 0.7241760492324829, "learning_rate": 1.3218419585955106e-05, "loss": 2.75118350982666, "step": 42060 }, { "epoch": 0.3395946175019979, "grad_norm": 0.8984029293060303, "learning_rate": 1.3216804116216894e-05, "loss": 2.697312927246094, "step": 42070 }, { "epoch": 0.3396753388277649, "grad_norm": 1.2717334032058716, "learning_rate": 1.321518864647868e-05, "loss": 3.2278457641601563, "step": 42080 }, { "epoch": 0.33975606015353194, "grad_norm": 0.7757812738418579, "learning_rate": 1.3213573176740468e-05, "loss": 3.1369560241699217, "step": 42090 }, { "epoch": 0.339836781479299, "grad_norm": 0.9463124871253967, "learning_rate": 1.3211957707002254e-05, "loss": 3.0035627365112303, "step": 42100 }, { "epoch": 0.33991750280506605, "grad_norm": 0.9367061257362366, "learning_rate": 1.3210342237264042e-05, "loss": 2.9752193450927735, "step": 42110 }, { "epoch": 0.33999822413083314, "grad_norm": 0.6204390525817871, "learning_rate": 1.3208726767525828e-05, "loss": 2.926172637939453, "step": 42120 }, { "epoch": 0.34007894545660017, "grad_norm": 1.6018903255462646, "learning_rate": 1.3207111297787615e-05, "loss": 3.194347953796387, "step": 42130 }, { "epoch": 0.34015966678236725, "grad_norm": 0.9727474451065063, "learning_rate": 1.3205495828049401e-05, "loss": 2.6514423370361326, "step": 42140 }, { "epoch": 0.3402403881081343, "grad_norm": 0.7580336332321167, "learning_rate": 1.3203880358311189e-05, "loss": 3.7468441009521483, "step": 42150 }, { "epoch": 0.34032110943390137, "grad_norm": 0.8397659063339233, "learning_rate": 1.3202264888572975e-05, "loss": 2.946888732910156, "step": 42160 }, { "epoch": 0.3404018307596684, "grad_norm": 1.0585800409317017, "learning_rate": 1.3200649418834763e-05, "loss": 2.911367988586426, "step": 42170 }, { "epoch": 0.34048255208543543, "grad_norm": 1.0796771049499512, "learning_rate": 1.3199033949096549e-05, "loss": 2.8070261001586916, "step": 42180 }, { "epoch": 0.3405632734112025, "grad_norm": 1.5487418174743652, "learning_rate": 1.3197418479358336e-05, "loss": 2.919164276123047, "step": 42190 }, { "epoch": 0.34064399473696955, "grad_norm": 1.049548864364624, "learning_rate": 1.3195803009620122e-05, "loss": 2.9242364883422853, "step": 42200 }, { "epoch": 0.34072471606273663, "grad_norm": 0.8017401695251465, "learning_rate": 1.319418753988191e-05, "loss": 2.684337043762207, "step": 42210 }, { "epoch": 0.34080543738850366, "grad_norm": 0.9778508543968201, "learning_rate": 1.3192572070143696e-05, "loss": 3.0403404235839844, "step": 42220 }, { "epoch": 0.34088615871427075, "grad_norm": 0.7989303469657898, "learning_rate": 1.3190956600405485e-05, "loss": 2.967644691467285, "step": 42230 }, { "epoch": 0.3409668800400378, "grad_norm": 1.2871466875076294, "learning_rate": 1.318934113066727e-05, "loss": 2.8698230743408204, "step": 42240 }, { "epoch": 0.3410476013658048, "grad_norm": 0.8724336624145508, "learning_rate": 1.3187725660929059e-05, "loss": 2.9880990982055664, "step": 42250 }, { "epoch": 0.3411283226915719, "grad_norm": 0.7726115584373474, "learning_rate": 1.3186110191190843e-05, "loss": 2.5939178466796875, "step": 42260 }, { "epoch": 0.3412090440173389, "grad_norm": 0.5746327042579651, "learning_rate": 1.3184494721452633e-05, "loss": 3.3068008422851562, "step": 42270 }, { "epoch": 0.341289765343106, "grad_norm": 1.9138273000717163, "learning_rate": 1.3182879251714417e-05, "loss": 3.784516143798828, "step": 42280 }, { "epoch": 0.34137048666887304, "grad_norm": 0.7137435078620911, "learning_rate": 1.3181263781976206e-05, "loss": 2.973794364929199, "step": 42290 }, { "epoch": 0.3414512079946401, "grad_norm": 1.0677411556243896, "learning_rate": 1.317964831223799e-05, "loss": 2.9917137145996096, "step": 42300 }, { "epoch": 0.34153192932040716, "grad_norm": 0.9407228827476501, "learning_rate": 1.317803284249978e-05, "loss": 3.105458641052246, "step": 42310 }, { "epoch": 0.3416126506461742, "grad_norm": 0.9700747728347778, "learning_rate": 1.3176417372761564e-05, "loss": 3.289760208129883, "step": 42320 }, { "epoch": 0.3416933719719413, "grad_norm": 0.7142617106437683, "learning_rate": 1.3174801903023354e-05, "loss": 2.6676250457763673, "step": 42330 }, { "epoch": 0.3417740932977083, "grad_norm": 1.448683261871338, "learning_rate": 1.3173186433285138e-05, "loss": 3.243795394897461, "step": 42340 }, { "epoch": 0.3418548146234754, "grad_norm": 1.1435532569885254, "learning_rate": 1.3171570963546928e-05, "loss": 2.8848665237426756, "step": 42350 }, { "epoch": 0.3419355359492424, "grad_norm": 1.1422696113586426, "learning_rate": 1.3169955493808712e-05, "loss": 2.9787193298339845, "step": 42360 }, { "epoch": 0.3420162572750095, "grad_norm": 0.9230915307998657, "learning_rate": 1.3168340024070501e-05, "loss": 2.8897594451904296, "step": 42370 }, { "epoch": 0.34209697860077654, "grad_norm": 0.8310801982879639, "learning_rate": 1.3166724554332286e-05, "loss": 3.5265888214111327, "step": 42380 }, { "epoch": 0.34217769992654357, "grad_norm": 0.9495733976364136, "learning_rate": 1.3165109084594075e-05, "loss": 2.933579444885254, "step": 42390 }, { "epoch": 0.34225842125231065, "grad_norm": 1.6400090456008911, "learning_rate": 1.3163493614855861e-05, "loss": 3.274734878540039, "step": 42400 }, { "epoch": 0.3423391425780777, "grad_norm": 1.0737773180007935, "learning_rate": 1.3161878145117649e-05, "loss": 2.9722551345825194, "step": 42410 }, { "epoch": 0.34241986390384477, "grad_norm": 1.0443705320358276, "learning_rate": 1.3160262675379435e-05, "loss": 2.885432815551758, "step": 42420 }, { "epoch": 0.3425005852296118, "grad_norm": 0.8710092306137085, "learning_rate": 1.3158647205641222e-05, "loss": 3.193977928161621, "step": 42430 }, { "epoch": 0.3425813065553789, "grad_norm": 1.6663669347763062, "learning_rate": 1.3157031735903008e-05, "loss": 3.463083267211914, "step": 42440 }, { "epoch": 0.3426620278811459, "grad_norm": 0.9170020222663879, "learning_rate": 1.3155416266164796e-05, "loss": 3.5362430572509767, "step": 42450 }, { "epoch": 0.342742749206913, "grad_norm": 0.6854429841041565, "learning_rate": 1.3153800796426582e-05, "loss": 2.8983180999755858, "step": 42460 }, { "epoch": 0.34282347053268003, "grad_norm": 1.0621452331542969, "learning_rate": 1.315218532668837e-05, "loss": 3.1372814178466797, "step": 42470 }, { "epoch": 0.34290419185844706, "grad_norm": 0.6541122794151306, "learning_rate": 1.3150569856950156e-05, "loss": 3.3670639038085937, "step": 42480 }, { "epoch": 0.34298491318421415, "grad_norm": 1.3408845663070679, "learning_rate": 1.3148954387211943e-05, "loss": 2.955250930786133, "step": 42490 }, { "epoch": 0.3430656345099812, "grad_norm": 0.8990605473518372, "learning_rate": 1.314733891747373e-05, "loss": 2.903224563598633, "step": 42500 }, { "epoch": 0.34314635583574826, "grad_norm": 0.9952216148376465, "learning_rate": 1.3145723447735517e-05, "loss": 3.0904565811157227, "step": 42510 }, { "epoch": 0.3432270771615153, "grad_norm": 1.8724051713943481, "learning_rate": 1.3144107977997303e-05, "loss": 2.758757972717285, "step": 42520 }, { "epoch": 0.3433077984872824, "grad_norm": 0.9439573287963867, "learning_rate": 1.314249250825909e-05, "loss": 3.274347686767578, "step": 42530 }, { "epoch": 0.3433885198130494, "grad_norm": 2.0127503871917725, "learning_rate": 1.3140877038520877e-05, "loss": 3.189792251586914, "step": 42540 }, { "epoch": 0.34346924113881644, "grad_norm": 0.9737213253974915, "learning_rate": 1.3139261568782664e-05, "loss": 3.0650936126708985, "step": 42550 }, { "epoch": 0.3435499624645835, "grad_norm": 1.4070281982421875, "learning_rate": 1.313764609904445e-05, "loss": 3.478964996337891, "step": 42560 }, { "epoch": 0.34363068379035056, "grad_norm": 0.6694025993347168, "learning_rate": 1.3136030629306238e-05, "loss": 3.5794937133789064, "step": 42570 }, { "epoch": 0.34371140511611764, "grad_norm": 0.9937965273857117, "learning_rate": 1.3134415159568024e-05, "loss": 2.9373809814453127, "step": 42580 }, { "epoch": 0.34379212644188467, "grad_norm": 0.9899508953094482, "learning_rate": 1.3132799689829812e-05, "loss": 2.670251655578613, "step": 42590 }, { "epoch": 0.34387284776765176, "grad_norm": 1.0262714624404907, "learning_rate": 1.3131184220091598e-05, "loss": 3.0289590835571287, "step": 42600 }, { "epoch": 0.3439535690934188, "grad_norm": 0.9327515959739685, "learning_rate": 1.3129568750353386e-05, "loss": 2.546920585632324, "step": 42610 }, { "epoch": 0.3440342904191858, "grad_norm": 1.2116073369979858, "learning_rate": 1.3127953280615172e-05, "loss": 3.099203109741211, "step": 42620 }, { "epoch": 0.3441150117449529, "grad_norm": 0.8853479027748108, "learning_rate": 1.312633781087696e-05, "loss": 3.079123115539551, "step": 42630 }, { "epoch": 0.34419573307071993, "grad_norm": 0.9097017645835876, "learning_rate": 1.3124722341138745e-05, "loss": 2.6403005599975584, "step": 42640 }, { "epoch": 0.344276454396487, "grad_norm": 0.9699557423591614, "learning_rate": 1.3123106871400533e-05, "loss": 2.7989217758178713, "step": 42650 }, { "epoch": 0.34435717572225405, "grad_norm": 0.9088767766952515, "learning_rate": 1.3121491401662319e-05, "loss": 2.8257604598999024, "step": 42660 }, { "epoch": 0.34443789704802114, "grad_norm": 0.8902788758277893, "learning_rate": 1.3119875931924107e-05, "loss": 2.7746255874633787, "step": 42670 }, { "epoch": 0.34451861837378817, "grad_norm": 1.4175416231155396, "learning_rate": 1.3118260462185893e-05, "loss": 3.3492034912109374, "step": 42680 }, { "epoch": 0.34459933969955525, "grad_norm": 0.7321488857269287, "learning_rate": 1.311664499244768e-05, "loss": 2.7945322036743163, "step": 42690 }, { "epoch": 0.3446800610253223, "grad_norm": 0.8027909398078918, "learning_rate": 1.3115029522709466e-05, "loss": 3.161532402038574, "step": 42700 }, { "epoch": 0.3447607823510893, "grad_norm": 0.7947239279747009, "learning_rate": 1.3113414052971254e-05, "loss": 2.985328483581543, "step": 42710 }, { "epoch": 0.3448415036768564, "grad_norm": 0.7691728472709656, "learning_rate": 1.311179858323304e-05, "loss": 2.9544925689697266, "step": 42720 }, { "epoch": 0.34492222500262343, "grad_norm": 0.9939761161804199, "learning_rate": 1.3110183113494828e-05, "loss": 2.9556182861328124, "step": 42730 }, { "epoch": 0.3450029463283905, "grad_norm": 0.9204154014587402, "learning_rate": 1.3108567643756614e-05, "loss": 2.836721992492676, "step": 42740 }, { "epoch": 0.34508366765415754, "grad_norm": 0.8328652381896973, "learning_rate": 1.3106952174018401e-05, "loss": 2.6871463775634767, "step": 42750 }, { "epoch": 0.34516438897992463, "grad_norm": 0.8684466481208801, "learning_rate": 1.3105336704280187e-05, "loss": 3.041608428955078, "step": 42760 }, { "epoch": 0.34524511030569166, "grad_norm": 0.9312137365341187, "learning_rate": 1.3103721234541975e-05, "loss": 2.9969749450683594, "step": 42770 }, { "epoch": 0.3453258316314587, "grad_norm": 0.9075160026550293, "learning_rate": 1.3102105764803761e-05, "loss": 2.6757534027099608, "step": 42780 }, { "epoch": 0.3454065529572258, "grad_norm": 1.0672211647033691, "learning_rate": 1.3100490295065549e-05, "loss": 2.9914283752441406, "step": 42790 }, { "epoch": 0.3454872742829928, "grad_norm": 1.1513019800186157, "learning_rate": 1.3098874825327335e-05, "loss": 2.7058526992797853, "step": 42800 }, { "epoch": 0.3455679956087599, "grad_norm": 0.7593165040016174, "learning_rate": 1.3097259355589122e-05, "loss": 3.3804447174072267, "step": 42810 }, { "epoch": 0.3456487169345269, "grad_norm": 1.0911414623260498, "learning_rate": 1.3095643885850908e-05, "loss": 2.7583051681518556, "step": 42820 }, { "epoch": 0.345729438260294, "grad_norm": 0.9072404503822327, "learning_rate": 1.3094028416112696e-05, "loss": 2.6254112243652346, "step": 42830 }, { "epoch": 0.34581015958606104, "grad_norm": 0.8856856822967529, "learning_rate": 1.3092412946374482e-05, "loss": 2.8412004470825196, "step": 42840 }, { "epoch": 0.34589088091182807, "grad_norm": 1.13411545753479, "learning_rate": 1.309079747663627e-05, "loss": 2.8956314086914063, "step": 42850 }, { "epoch": 0.34597160223759515, "grad_norm": 1.4119579792022705, "learning_rate": 1.3089182006898057e-05, "loss": 3.0690639495849608, "step": 42860 }, { "epoch": 0.3460523235633622, "grad_norm": 1.0607521533966064, "learning_rate": 1.3087566537159843e-05, "loss": 2.5420251846313477, "step": 42870 }, { "epoch": 0.34613304488912927, "grad_norm": 1.0828781127929688, "learning_rate": 1.3085951067421631e-05, "loss": 2.894725799560547, "step": 42880 }, { "epoch": 0.3462137662148963, "grad_norm": 0.9246758818626404, "learning_rate": 1.3084335597683417e-05, "loss": 2.592910957336426, "step": 42890 }, { "epoch": 0.3462944875406634, "grad_norm": 0.8138593435287476, "learning_rate": 1.3082720127945205e-05, "loss": 2.8605278015136717, "step": 42900 }, { "epoch": 0.3463752088664304, "grad_norm": 1.1124393939971924, "learning_rate": 1.3081104658206991e-05, "loss": 3.1673818588256837, "step": 42910 }, { "epoch": 0.3464559301921975, "grad_norm": 1.104857325553894, "learning_rate": 1.3079489188468779e-05, "loss": 3.1018672943115235, "step": 42920 }, { "epoch": 0.34653665151796453, "grad_norm": 0.8333137035369873, "learning_rate": 1.3077873718730565e-05, "loss": 3.031203269958496, "step": 42930 }, { "epoch": 0.34661737284373156, "grad_norm": 0.6093696355819702, "learning_rate": 1.3076258248992352e-05, "loss": 3.1363706588745117, "step": 42940 }, { "epoch": 0.34669809416949865, "grad_norm": 0.99817955493927, "learning_rate": 1.3074642779254138e-05, "loss": 4.010385894775391, "step": 42950 }, { "epoch": 0.3467788154952657, "grad_norm": 0.7718906402587891, "learning_rate": 1.3073027309515926e-05, "loss": 2.8774864196777346, "step": 42960 }, { "epoch": 0.34685953682103277, "grad_norm": 0.8264800906181335, "learning_rate": 1.3071411839777712e-05, "loss": 2.579971504211426, "step": 42970 }, { "epoch": 0.3469402581467998, "grad_norm": 0.7760770916938782, "learning_rate": 1.30697963700395e-05, "loss": 3.2429298400878905, "step": 42980 }, { "epoch": 0.3470209794725669, "grad_norm": 1.1382777690887451, "learning_rate": 1.3068180900301286e-05, "loss": 2.6832571029663086, "step": 42990 }, { "epoch": 0.3471017007983339, "grad_norm": 0.6901029944419861, "learning_rate": 1.3066565430563073e-05, "loss": 3.0304542541503907, "step": 43000 }, { "epoch": 0.34718242212410094, "grad_norm": 1.045964002609253, "learning_rate": 1.306494996082486e-05, "loss": 2.985488700866699, "step": 43010 }, { "epoch": 0.34726314344986803, "grad_norm": 1.1203978061676025, "learning_rate": 1.3063334491086647e-05, "loss": 3.274608612060547, "step": 43020 }, { "epoch": 0.34734386477563506, "grad_norm": 1.03663969039917, "learning_rate": 1.3061719021348433e-05, "loss": 2.957929229736328, "step": 43030 }, { "epoch": 0.34742458610140214, "grad_norm": 0.9283379316329956, "learning_rate": 1.306010355161022e-05, "loss": 2.840282440185547, "step": 43040 }, { "epoch": 0.3475053074271692, "grad_norm": 0.9856231212615967, "learning_rate": 1.3058488081872007e-05, "loss": 3.5891178131103514, "step": 43050 }, { "epoch": 0.34758602875293626, "grad_norm": 0.9319584369659424, "learning_rate": 1.3056872612133794e-05, "loss": 3.089089775085449, "step": 43060 }, { "epoch": 0.3476667500787033, "grad_norm": 0.6775723099708557, "learning_rate": 1.305525714239558e-05, "loss": 3.1443592071533204, "step": 43070 }, { "epoch": 0.3477474714044703, "grad_norm": 1.383227825164795, "learning_rate": 1.3053641672657368e-05, "loss": 3.100448226928711, "step": 43080 }, { "epoch": 0.3478281927302374, "grad_norm": 1.0421726703643799, "learning_rate": 1.3052026202919154e-05, "loss": 2.755861282348633, "step": 43090 }, { "epoch": 0.34790891405600444, "grad_norm": 1.002778172492981, "learning_rate": 1.3050410733180943e-05, "loss": 2.757614517211914, "step": 43100 }, { "epoch": 0.3479896353817715, "grad_norm": 0.6193873882293701, "learning_rate": 1.3048795263442728e-05, "loss": 3.048894691467285, "step": 43110 }, { "epoch": 0.34807035670753855, "grad_norm": 0.5808237791061401, "learning_rate": 1.3047179793704517e-05, "loss": 2.9058773040771486, "step": 43120 }, { "epoch": 0.34815107803330564, "grad_norm": 0.9791346192359924, "learning_rate": 1.3045564323966301e-05, "loss": 2.9324033737182615, "step": 43130 }, { "epoch": 0.34823179935907267, "grad_norm": 0.8704440593719482, "learning_rate": 1.3043948854228091e-05, "loss": 3.1689252853393555, "step": 43140 }, { "epoch": 0.34831252068483975, "grad_norm": 1.2117180824279785, "learning_rate": 1.3042333384489875e-05, "loss": 3.021185111999512, "step": 43150 }, { "epoch": 0.3483932420106068, "grad_norm": 0.6242602467536926, "learning_rate": 1.3040717914751665e-05, "loss": 3.0169612884521486, "step": 43160 }, { "epoch": 0.3484739633363738, "grad_norm": 0.630103588104248, "learning_rate": 1.3039102445013449e-05, "loss": 3.0772871017456054, "step": 43170 }, { "epoch": 0.3485546846621409, "grad_norm": 0.7061903476715088, "learning_rate": 1.3037486975275238e-05, "loss": 3.0601341247558596, "step": 43180 }, { "epoch": 0.34863540598790793, "grad_norm": 0.812173068523407, "learning_rate": 1.3035871505537023e-05, "loss": 3.0953289031982423, "step": 43190 }, { "epoch": 0.348716127313675, "grad_norm": 0.9885363578796387, "learning_rate": 1.3034256035798812e-05, "loss": 2.5480955123901365, "step": 43200 }, { "epoch": 0.34879684863944205, "grad_norm": 0.528232991695404, "learning_rate": 1.3032640566060596e-05, "loss": 3.4265369415283202, "step": 43210 }, { "epoch": 0.34887756996520913, "grad_norm": 0.8360686302185059, "learning_rate": 1.3031025096322386e-05, "loss": 2.804627227783203, "step": 43220 }, { "epoch": 0.34895829129097616, "grad_norm": 0.8922044634819031, "learning_rate": 1.302940962658417e-05, "loss": 3.651660919189453, "step": 43230 }, { "epoch": 0.3490390126167432, "grad_norm": 0.9730062484741211, "learning_rate": 1.302779415684596e-05, "loss": 3.0105934143066406, "step": 43240 }, { "epoch": 0.3491197339425103, "grad_norm": 4.334824085235596, "learning_rate": 1.3026178687107744e-05, "loss": 3.575083541870117, "step": 43250 }, { "epoch": 0.3492004552682773, "grad_norm": 1.103144645690918, "learning_rate": 1.3024563217369533e-05, "loss": 2.744089698791504, "step": 43260 }, { "epoch": 0.3492811765940444, "grad_norm": 1.6191948652267456, "learning_rate": 1.3022947747631319e-05, "loss": 3.1265380859375, "step": 43270 }, { "epoch": 0.3493618979198114, "grad_norm": 1.2505062818527222, "learning_rate": 1.3021332277893107e-05, "loss": 3.276647186279297, "step": 43280 }, { "epoch": 0.3494426192455785, "grad_norm": 0.7168669104576111, "learning_rate": 1.3019716808154893e-05, "loss": 2.716192436218262, "step": 43290 }, { "epoch": 0.34952334057134554, "grad_norm": 0.7063372731208801, "learning_rate": 1.301810133841668e-05, "loss": 2.957616996765137, "step": 43300 }, { "epoch": 0.3496040618971126, "grad_norm": 0.635543942451477, "learning_rate": 1.3016485868678466e-05, "loss": 2.929248809814453, "step": 43310 }, { "epoch": 0.34968478322287966, "grad_norm": 0.9584351778030396, "learning_rate": 1.3014870398940254e-05, "loss": 2.908070945739746, "step": 43320 }, { "epoch": 0.3497655045486467, "grad_norm": 2.415250062942505, "learning_rate": 1.301325492920204e-05, "loss": 3.079290199279785, "step": 43330 }, { "epoch": 0.3498462258744138, "grad_norm": 0.9535290598869324, "learning_rate": 1.3011639459463828e-05, "loss": 2.8073619842529296, "step": 43340 }, { "epoch": 0.3499269472001808, "grad_norm": 0.9431089162826538, "learning_rate": 1.3010023989725614e-05, "loss": 2.8658071517944337, "step": 43350 }, { "epoch": 0.3500076685259479, "grad_norm": 0.4980735778808594, "learning_rate": 1.3008408519987401e-05, "loss": 2.5136541366577148, "step": 43360 }, { "epoch": 0.3500883898517149, "grad_norm": 0.857661247253418, "learning_rate": 1.3006793050249187e-05, "loss": 2.8676586151123047, "step": 43370 }, { "epoch": 0.350169111177482, "grad_norm": 1.3376595973968506, "learning_rate": 1.3005177580510975e-05, "loss": 2.6923831939697265, "step": 43380 }, { "epoch": 0.35024983250324904, "grad_norm": 0.9745600819587708, "learning_rate": 1.3003562110772761e-05, "loss": 3.2512466430664064, "step": 43390 }, { "epoch": 0.35033055382901607, "grad_norm": 0.750434398651123, "learning_rate": 1.3001946641034549e-05, "loss": 2.9633161544799806, "step": 43400 }, { "epoch": 0.35041127515478315, "grad_norm": 0.8271122574806213, "learning_rate": 1.3000331171296335e-05, "loss": 2.828756332397461, "step": 43410 }, { "epoch": 0.3504919964805502, "grad_norm": 1.0302016735076904, "learning_rate": 1.2998715701558123e-05, "loss": 3.19211483001709, "step": 43420 }, { "epoch": 0.35057271780631727, "grad_norm": 0.6818568110466003, "learning_rate": 1.2997100231819909e-05, "loss": 2.7590909957885743, "step": 43430 }, { "epoch": 0.3506534391320843, "grad_norm": 0.6783785223960876, "learning_rate": 1.2995484762081696e-05, "loss": 2.791286659240723, "step": 43440 }, { "epoch": 0.3507341604578514, "grad_norm": 4.45966100692749, "learning_rate": 1.2993869292343482e-05, "loss": 3.7993194580078127, "step": 43450 }, { "epoch": 0.3508148817836184, "grad_norm": 1.0604631900787354, "learning_rate": 1.299225382260527e-05, "loss": 3.0724418640136717, "step": 43460 }, { "epoch": 0.35089560310938545, "grad_norm": 0.8629642128944397, "learning_rate": 1.2990638352867056e-05, "loss": 2.9996252059936523, "step": 43470 }, { "epoch": 0.35097632443515253, "grad_norm": 1.4212472438812256, "learning_rate": 1.2989022883128844e-05, "loss": 3.182517433166504, "step": 43480 }, { "epoch": 0.35105704576091956, "grad_norm": 0.8774269819259644, "learning_rate": 1.298740741339063e-05, "loss": 2.855936622619629, "step": 43490 }, { "epoch": 0.35113776708668665, "grad_norm": 1.3395553827285767, "learning_rate": 1.2985791943652417e-05, "loss": 2.966412162780762, "step": 43500 }, { "epoch": 0.3512184884124537, "grad_norm": 0.863042414188385, "learning_rate": 1.2984176473914203e-05, "loss": 3.4762451171875, "step": 43510 }, { "epoch": 0.35129920973822076, "grad_norm": 1.1552045345306396, "learning_rate": 1.2982561004175991e-05, "loss": 3.2118450164794923, "step": 43520 }, { "epoch": 0.3513799310639878, "grad_norm": 1.1831775903701782, "learning_rate": 1.2980945534437777e-05, "loss": 2.7727224349975588, "step": 43530 }, { "epoch": 0.3514606523897548, "grad_norm": 0.8651805520057678, "learning_rate": 1.2979330064699565e-05, "loss": 3.118296813964844, "step": 43540 }, { "epoch": 0.3515413737155219, "grad_norm": 1.027266263961792, "learning_rate": 1.297771459496135e-05, "loss": 3.1823205947875977, "step": 43550 }, { "epoch": 0.35162209504128894, "grad_norm": 0.5513719320297241, "learning_rate": 1.2976099125223138e-05, "loss": 2.9519378662109377, "step": 43560 }, { "epoch": 0.351702816367056, "grad_norm": 1.4241946935653687, "learning_rate": 1.2974483655484924e-05, "loss": 3.0623729705810545, "step": 43570 }, { "epoch": 0.35178353769282306, "grad_norm": 1.072839379310608, "learning_rate": 1.2972868185746712e-05, "loss": 2.5235692977905275, "step": 43580 }, { "epoch": 0.35186425901859014, "grad_norm": 1.054341435432434, "learning_rate": 1.2971252716008498e-05, "loss": 3.0398193359375, "step": 43590 }, { "epoch": 0.3519449803443572, "grad_norm": 0.8155085444450378, "learning_rate": 1.2969637246270286e-05, "loss": 2.808913230895996, "step": 43600 }, { "epoch": 0.3520257016701242, "grad_norm": 0.9643861651420593, "learning_rate": 1.2968021776532072e-05, "loss": 2.9785655975341796, "step": 43610 }, { "epoch": 0.3521064229958913, "grad_norm": 1.0871164798736572, "learning_rate": 1.296640630679386e-05, "loss": 2.8817041397094725, "step": 43620 }, { "epoch": 0.3521871443216583, "grad_norm": 1.2212320566177368, "learning_rate": 1.2964790837055645e-05, "loss": 2.8453025817871094, "step": 43630 }, { "epoch": 0.3522678656474254, "grad_norm": 1.2757542133331299, "learning_rate": 1.2963175367317433e-05, "loss": 3.376668930053711, "step": 43640 }, { "epoch": 0.35234858697319243, "grad_norm": 0.797660768032074, "learning_rate": 1.296155989757922e-05, "loss": 2.8886234283447267, "step": 43650 }, { "epoch": 0.3524293082989595, "grad_norm": 1.4962838888168335, "learning_rate": 1.2959944427841007e-05, "loss": 3.204361343383789, "step": 43660 }, { "epoch": 0.35251002962472655, "grad_norm": 0.9250943064689636, "learning_rate": 1.2958328958102793e-05, "loss": 3.2816020965576174, "step": 43670 }, { "epoch": 0.35259075095049364, "grad_norm": 1.67978036403656, "learning_rate": 1.295671348836458e-05, "loss": 3.312448501586914, "step": 43680 }, { "epoch": 0.35267147227626067, "grad_norm": 1.7530949115753174, "learning_rate": 1.2955098018626367e-05, "loss": 3.1493186950683594, "step": 43690 }, { "epoch": 0.3527521936020277, "grad_norm": 1.188023567199707, "learning_rate": 1.2953482548888154e-05, "loss": 2.6313623428344726, "step": 43700 }, { "epoch": 0.3528329149277948, "grad_norm": 0.5236805081367493, "learning_rate": 1.295186707914994e-05, "loss": 2.923168182373047, "step": 43710 }, { "epoch": 0.3529136362535618, "grad_norm": 0.9830164909362793, "learning_rate": 1.2950251609411728e-05, "loss": 2.7856895446777346, "step": 43720 }, { "epoch": 0.3529943575793289, "grad_norm": 0.7860188484191895, "learning_rate": 1.2948636139673514e-05, "loss": 2.743062973022461, "step": 43730 }, { "epoch": 0.35307507890509593, "grad_norm": 0.9455174207687378, "learning_rate": 1.2947020669935302e-05, "loss": 2.683602142333984, "step": 43740 }, { "epoch": 0.353155800230863, "grad_norm": 0.9391053318977356, "learning_rate": 1.2945405200197088e-05, "loss": 3.2484779357910156, "step": 43750 }, { "epoch": 0.35323652155663005, "grad_norm": 0.838278591632843, "learning_rate": 1.2943789730458875e-05, "loss": 3.2063560485839844, "step": 43760 }, { "epoch": 0.3533172428823971, "grad_norm": 0.7230780124664307, "learning_rate": 1.2942174260720661e-05, "loss": 3.3018463134765623, "step": 43770 }, { "epoch": 0.35339796420816416, "grad_norm": 1.1306378841400146, "learning_rate": 1.2940558790982449e-05, "loss": 3.064803886413574, "step": 43780 }, { "epoch": 0.3534786855339312, "grad_norm": 2.0488967895507812, "learning_rate": 1.2938943321244235e-05, "loss": 3.4246337890625, "step": 43790 }, { "epoch": 0.3535594068596983, "grad_norm": 0.6934654712677002, "learning_rate": 1.2937327851506023e-05, "loss": 2.801848602294922, "step": 43800 }, { "epoch": 0.3536401281854653, "grad_norm": 0.6308866739273071, "learning_rate": 1.2935712381767809e-05, "loss": 3.0917407989501955, "step": 43810 }, { "epoch": 0.3537208495112324, "grad_norm": 0.6386212706565857, "learning_rate": 1.2934096912029596e-05, "loss": 2.981935501098633, "step": 43820 }, { "epoch": 0.3538015708369994, "grad_norm": 1.2728228569030762, "learning_rate": 1.2932481442291382e-05, "loss": 2.8221435546875, "step": 43830 }, { "epoch": 0.35388229216276645, "grad_norm": 0.7481537461280823, "learning_rate": 1.293086597255317e-05, "loss": 3.126748275756836, "step": 43840 }, { "epoch": 0.35396301348853354, "grad_norm": 0.6943281888961792, "learning_rate": 1.2929250502814956e-05, "loss": 3.2431983947753906, "step": 43850 }, { "epoch": 0.35404373481430057, "grad_norm": 0.6448924541473389, "learning_rate": 1.2927635033076744e-05, "loss": 3.220195007324219, "step": 43860 }, { "epoch": 0.35412445614006766, "grad_norm": 1.7082610130310059, "learning_rate": 1.292601956333853e-05, "loss": 3.085975456237793, "step": 43870 }, { "epoch": 0.3542051774658347, "grad_norm": 0.8219209313392639, "learning_rate": 1.2924404093600317e-05, "loss": 2.734169769287109, "step": 43880 }, { "epoch": 0.35428589879160177, "grad_norm": 0.9638783931732178, "learning_rate": 1.2922788623862103e-05, "loss": 2.4025596618652343, "step": 43890 }, { "epoch": 0.3543666201173688, "grad_norm": 0.7760400772094727, "learning_rate": 1.2921173154123891e-05, "loss": 2.5681011199951174, "step": 43900 }, { "epoch": 0.3544473414431359, "grad_norm": 0.9018696546554565, "learning_rate": 1.2919557684385677e-05, "loss": 2.503396224975586, "step": 43910 }, { "epoch": 0.3545280627689029, "grad_norm": 0.9963418841362, "learning_rate": 1.2917942214647465e-05, "loss": 2.6856782913208006, "step": 43920 }, { "epoch": 0.35460878409466995, "grad_norm": 1.4052133560180664, "learning_rate": 1.291632674490925e-05, "loss": 2.795301818847656, "step": 43930 }, { "epoch": 0.35468950542043703, "grad_norm": 0.9188799262046814, "learning_rate": 1.2914711275171039e-05, "loss": 2.7168899536132813, "step": 43940 }, { "epoch": 0.35477022674620406, "grad_norm": 0.8327363133430481, "learning_rate": 1.2913095805432825e-05, "loss": 3.1880939483642576, "step": 43950 }, { "epoch": 0.35485094807197115, "grad_norm": 0.7310583591461182, "learning_rate": 1.2911480335694612e-05, "loss": 2.917524528503418, "step": 43960 }, { "epoch": 0.3549316693977382, "grad_norm": 0.6752335429191589, "learning_rate": 1.2909864865956398e-05, "loss": 3.5356082916259766, "step": 43970 }, { "epoch": 0.35501239072350527, "grad_norm": 1.1641486883163452, "learning_rate": 1.2908249396218186e-05, "loss": 2.8586549758911133, "step": 43980 }, { "epoch": 0.3550931120492723, "grad_norm": 0.8621492385864258, "learning_rate": 1.2906633926479972e-05, "loss": 3.140880584716797, "step": 43990 }, { "epoch": 0.3551738333750393, "grad_norm": 1.0638948678970337, "learning_rate": 1.290501845674176e-05, "loss": 3.13907413482666, "step": 44000 }, { "epoch": 0.3552545547008064, "grad_norm": 1.8610568046569824, "learning_rate": 1.2903402987003546e-05, "loss": 2.7191421508789064, "step": 44010 }, { "epoch": 0.35533527602657344, "grad_norm": 0.8137440085411072, "learning_rate": 1.2901787517265333e-05, "loss": 3.0165594100952147, "step": 44020 }, { "epoch": 0.35541599735234053, "grad_norm": 0.8186131715774536, "learning_rate": 1.290017204752712e-05, "loss": 2.6108013153076173, "step": 44030 }, { "epoch": 0.35549671867810756, "grad_norm": 0.6032033562660217, "learning_rate": 1.2898556577788907e-05, "loss": 2.7924631118774412, "step": 44040 }, { "epoch": 0.35557744000387465, "grad_norm": 0.9638074040412903, "learning_rate": 1.2896941108050693e-05, "loss": 3.1137319564819337, "step": 44050 }, { "epoch": 0.3556581613296417, "grad_norm": 1.1501681804656982, "learning_rate": 1.289532563831248e-05, "loss": 3.2787395477294923, "step": 44060 }, { "epoch": 0.3557388826554087, "grad_norm": 0.7360047101974487, "learning_rate": 1.2893710168574267e-05, "loss": 2.939897155761719, "step": 44070 }, { "epoch": 0.3558196039811758, "grad_norm": 0.7903885841369629, "learning_rate": 1.2892094698836054e-05, "loss": 3.1809743881225585, "step": 44080 }, { "epoch": 0.3559003253069428, "grad_norm": 0.8226714730262756, "learning_rate": 1.289047922909784e-05, "loss": 3.0097539901733397, "step": 44090 }, { "epoch": 0.3559810466327099, "grad_norm": 1.1590524911880493, "learning_rate": 1.2888863759359628e-05, "loss": 2.9243112564086915, "step": 44100 }, { "epoch": 0.35606176795847694, "grad_norm": 0.8296331167221069, "learning_rate": 1.2887248289621417e-05, "loss": 3.130074882507324, "step": 44110 }, { "epoch": 0.356142489284244, "grad_norm": 1.0605720281600952, "learning_rate": 1.2885632819883202e-05, "loss": 2.805113410949707, "step": 44120 }, { "epoch": 0.35622321061001105, "grad_norm": 0.8528913855552673, "learning_rate": 1.2884017350144991e-05, "loss": 3.787293243408203, "step": 44130 }, { "epoch": 0.35630393193577814, "grad_norm": 1.1598345041275024, "learning_rate": 1.2882401880406777e-05, "loss": 3.053982734680176, "step": 44140 }, { "epoch": 0.35638465326154517, "grad_norm": 1.0712867975234985, "learning_rate": 1.2880786410668565e-05, "loss": 3.118330955505371, "step": 44150 }, { "epoch": 0.3564653745873122, "grad_norm": 1.2137054204940796, "learning_rate": 1.287917094093035e-05, "loss": 3.5049461364746093, "step": 44160 }, { "epoch": 0.3565460959130793, "grad_norm": 1.0466079711914062, "learning_rate": 1.2877555471192139e-05, "loss": 3.2865440368652346, "step": 44170 }, { "epoch": 0.3566268172388463, "grad_norm": 1.119688630104065, "learning_rate": 1.2875940001453925e-05, "loss": 2.6595922470092774, "step": 44180 }, { "epoch": 0.3567075385646134, "grad_norm": 1.5351191759109497, "learning_rate": 1.2874324531715712e-05, "loss": 2.592513847351074, "step": 44190 }, { "epoch": 0.35678825989038043, "grad_norm": 0.7669406533241272, "learning_rate": 1.2872709061977498e-05, "loss": 3.115277099609375, "step": 44200 }, { "epoch": 0.3568689812161475, "grad_norm": 1.2385462522506714, "learning_rate": 1.2871093592239286e-05, "loss": 3.2977256774902344, "step": 44210 }, { "epoch": 0.35694970254191455, "grad_norm": 1.1042768955230713, "learning_rate": 1.2869478122501072e-05, "loss": 2.73659610748291, "step": 44220 }, { "epoch": 0.3570304238676816, "grad_norm": 1.4154446125030518, "learning_rate": 1.286786265276286e-05, "loss": 2.5776885986328124, "step": 44230 }, { "epoch": 0.35711114519344866, "grad_norm": 0.9992244243621826, "learning_rate": 1.2866247183024646e-05, "loss": 3.0654525756835938, "step": 44240 }, { "epoch": 0.3571918665192157, "grad_norm": 0.9101970195770264, "learning_rate": 1.2864631713286433e-05, "loss": 2.503958320617676, "step": 44250 }, { "epoch": 0.3572725878449828, "grad_norm": 0.9927441477775574, "learning_rate": 1.286301624354822e-05, "loss": 2.8394144058227537, "step": 44260 }, { "epoch": 0.3573533091707498, "grad_norm": 0.9373641610145569, "learning_rate": 1.2861400773810007e-05, "loss": 3.348602294921875, "step": 44270 }, { "epoch": 0.3574340304965169, "grad_norm": 0.9166012406349182, "learning_rate": 1.2859785304071793e-05, "loss": 3.004804801940918, "step": 44280 }, { "epoch": 0.3575147518222839, "grad_norm": 0.725142776966095, "learning_rate": 1.285816983433358e-05, "loss": 3.162653350830078, "step": 44290 }, { "epoch": 0.35759547314805096, "grad_norm": 1.2504006624221802, "learning_rate": 1.2856554364595367e-05, "loss": 3.4264995574951174, "step": 44300 }, { "epoch": 0.35767619447381804, "grad_norm": 1.2722376585006714, "learning_rate": 1.2854938894857154e-05, "loss": 3.089817237854004, "step": 44310 }, { "epoch": 0.3577569157995851, "grad_norm": 0.9744825959205627, "learning_rate": 1.285332342511894e-05, "loss": 2.8801483154296874, "step": 44320 }, { "epoch": 0.35783763712535216, "grad_norm": 0.8551511764526367, "learning_rate": 1.2851707955380728e-05, "loss": 2.9128337860107423, "step": 44330 }, { "epoch": 0.3579183584511192, "grad_norm": 1.2649009227752686, "learning_rate": 1.2850092485642514e-05, "loss": 2.987065315246582, "step": 44340 }, { "epoch": 0.3579990797768863, "grad_norm": 0.8054251670837402, "learning_rate": 1.2848477015904302e-05, "loss": 3.235342025756836, "step": 44350 }, { "epoch": 0.3580798011026533, "grad_norm": 1.50320565700531, "learning_rate": 1.2846861546166088e-05, "loss": 3.075356101989746, "step": 44360 }, { "epoch": 0.3581605224284204, "grad_norm": 0.7892515063285828, "learning_rate": 1.2845246076427875e-05, "loss": 2.886713981628418, "step": 44370 }, { "epoch": 0.3582412437541874, "grad_norm": 0.8729177713394165, "learning_rate": 1.2843630606689661e-05, "loss": 2.6917638778686523, "step": 44380 }, { "epoch": 0.35832196507995445, "grad_norm": 0.6933605074882507, "learning_rate": 1.2842015136951449e-05, "loss": 3.1496057510375977, "step": 44390 }, { "epoch": 0.35840268640572154, "grad_norm": 1.0627208948135376, "learning_rate": 1.2840399667213235e-05, "loss": 2.9758108139038084, "step": 44400 }, { "epoch": 0.35848340773148857, "grad_norm": 0.7020359635353088, "learning_rate": 1.2838784197475023e-05, "loss": 3.2860321044921874, "step": 44410 }, { "epoch": 0.35856412905725565, "grad_norm": 0.7235882878303528, "learning_rate": 1.2837168727736809e-05, "loss": 2.5724321365356446, "step": 44420 }, { "epoch": 0.3586448503830227, "grad_norm": 1.2221570014953613, "learning_rate": 1.2835553257998597e-05, "loss": 2.8396039962768556, "step": 44430 }, { "epoch": 0.35872557170878977, "grad_norm": 1.0887200832366943, "learning_rate": 1.2833937788260383e-05, "loss": 3.155194854736328, "step": 44440 }, { "epoch": 0.3588062930345568, "grad_norm": 0.5694950819015503, "learning_rate": 1.283232231852217e-05, "loss": 3.0161888122558596, "step": 44450 }, { "epoch": 0.35888701436032383, "grad_norm": 1.1756376028060913, "learning_rate": 1.2830706848783956e-05, "loss": 2.723470687866211, "step": 44460 }, { "epoch": 0.3589677356860909, "grad_norm": 1.212538242340088, "learning_rate": 1.2829091379045744e-05, "loss": 2.782612609863281, "step": 44470 }, { "epoch": 0.35904845701185795, "grad_norm": 0.8203389644622803, "learning_rate": 1.282747590930753e-05, "loss": 3.228408432006836, "step": 44480 }, { "epoch": 0.35912917833762503, "grad_norm": 0.7855175733566284, "learning_rate": 1.2825860439569318e-05, "loss": 2.4953645706176757, "step": 44490 }, { "epoch": 0.35920989966339206, "grad_norm": 0.6758949160575867, "learning_rate": 1.2824244969831104e-05, "loss": 2.979949188232422, "step": 44500 }, { "epoch": 0.35929062098915915, "grad_norm": 0.6805091500282288, "learning_rate": 1.2822629500092891e-05, "loss": 2.792055130004883, "step": 44510 }, { "epoch": 0.3593713423149262, "grad_norm": 1.094803810119629, "learning_rate": 1.2821014030354677e-05, "loss": 3.1840356826782226, "step": 44520 }, { "epoch": 0.3594520636406932, "grad_norm": 1.0152528285980225, "learning_rate": 1.2819398560616465e-05, "loss": 2.5853944778442384, "step": 44530 }, { "epoch": 0.3595327849664603, "grad_norm": 0.8704527616500854, "learning_rate": 1.2817783090878251e-05, "loss": 3.314780426025391, "step": 44540 }, { "epoch": 0.3596135062922273, "grad_norm": 0.7495918273925781, "learning_rate": 1.2816167621140039e-05, "loss": 2.930889129638672, "step": 44550 }, { "epoch": 0.3596942276179944, "grad_norm": 1.1397379636764526, "learning_rate": 1.2814552151401825e-05, "loss": 2.9362262725830077, "step": 44560 }, { "epoch": 0.35977494894376144, "grad_norm": 0.7876633405685425, "learning_rate": 1.2812936681663612e-05, "loss": 2.7764848709106444, "step": 44570 }, { "epoch": 0.3598556702695285, "grad_norm": 0.6409212946891785, "learning_rate": 1.2811321211925398e-05, "loss": 2.9598302841186523, "step": 44580 }, { "epoch": 0.35993639159529556, "grad_norm": 1.0083744525909424, "learning_rate": 1.2809705742187186e-05, "loss": 2.7214988708496093, "step": 44590 }, { "epoch": 0.36001711292106264, "grad_norm": 1.3926955461502075, "learning_rate": 1.2808090272448972e-05, "loss": 2.6583583831787108, "step": 44600 }, { "epoch": 0.3600978342468297, "grad_norm": 1.103391170501709, "learning_rate": 1.280647480271076e-05, "loss": 3.0028295516967773, "step": 44610 }, { "epoch": 0.3601785555725967, "grad_norm": 1.1559194326400757, "learning_rate": 1.2804859332972546e-05, "loss": 3.023982620239258, "step": 44620 }, { "epoch": 0.3602592768983638, "grad_norm": 0.7810999751091003, "learning_rate": 1.2803243863234333e-05, "loss": 2.952633857727051, "step": 44630 }, { "epoch": 0.3603399982241308, "grad_norm": 1.1191939115524292, "learning_rate": 1.280162839349612e-05, "loss": 3.374037170410156, "step": 44640 }, { "epoch": 0.3604207195498979, "grad_norm": 1.7128547430038452, "learning_rate": 1.2800012923757907e-05, "loss": 3.416499710083008, "step": 44650 }, { "epoch": 0.36050144087566494, "grad_norm": 0.842979371547699, "learning_rate": 1.2798397454019693e-05, "loss": 3.120039367675781, "step": 44660 }, { "epoch": 0.360582162201432, "grad_norm": 0.8807423114776611, "learning_rate": 1.279678198428148e-05, "loss": 3.098855972290039, "step": 44670 }, { "epoch": 0.36066288352719905, "grad_norm": 0.8475968241691589, "learning_rate": 1.2795166514543267e-05, "loss": 2.913548469543457, "step": 44680 }, { "epoch": 0.3607436048529661, "grad_norm": 0.9289010167121887, "learning_rate": 1.2793551044805054e-05, "loss": 2.724373435974121, "step": 44690 }, { "epoch": 0.36082432617873317, "grad_norm": 0.6698432564735413, "learning_rate": 1.279193557506684e-05, "loss": 3.1862327575683596, "step": 44700 }, { "epoch": 0.3609050475045002, "grad_norm": 0.8795800805091858, "learning_rate": 1.2790320105328628e-05, "loss": 3.3166748046875, "step": 44710 }, { "epoch": 0.3609857688302673, "grad_norm": 0.8377935886383057, "learning_rate": 1.2788704635590414e-05, "loss": 3.033137893676758, "step": 44720 }, { "epoch": 0.3610664901560343, "grad_norm": 0.772520899772644, "learning_rate": 1.2787089165852202e-05, "loss": 3.1125980377197267, "step": 44730 }, { "epoch": 0.3611472114818014, "grad_norm": 1.1684150695800781, "learning_rate": 1.2785473696113988e-05, "loss": 3.3851165771484375, "step": 44740 }, { "epoch": 0.36122793280756843, "grad_norm": 0.722062349319458, "learning_rate": 1.2783858226375776e-05, "loss": 2.8761228561401366, "step": 44750 }, { "epoch": 0.36130865413333546, "grad_norm": 0.7000346183776855, "learning_rate": 1.2782242756637562e-05, "loss": 2.8979108810424803, "step": 44760 }, { "epoch": 0.36138937545910255, "grad_norm": 1.391666293144226, "learning_rate": 1.278062728689935e-05, "loss": 3.044249725341797, "step": 44770 }, { "epoch": 0.3614700967848696, "grad_norm": 1.2157584428787231, "learning_rate": 1.2779011817161135e-05, "loss": 2.8147531509399415, "step": 44780 }, { "epoch": 0.36155081811063666, "grad_norm": 1.0995655059814453, "learning_rate": 1.2777396347422923e-05, "loss": 2.770001220703125, "step": 44790 }, { "epoch": 0.3616315394364037, "grad_norm": 1.2859386205673218, "learning_rate": 1.2775780877684709e-05, "loss": 3.15991096496582, "step": 44800 }, { "epoch": 0.3617122607621708, "grad_norm": 0.9572939276695251, "learning_rate": 1.2774165407946497e-05, "loss": 3.4336063385009767, "step": 44810 }, { "epoch": 0.3617929820879378, "grad_norm": 1.291579008102417, "learning_rate": 1.2772549938208283e-05, "loss": 2.72445125579834, "step": 44820 }, { "epoch": 0.36187370341370484, "grad_norm": 1.274107813835144, "learning_rate": 1.277093446847007e-05, "loss": 2.7432125091552733, "step": 44830 }, { "epoch": 0.3619544247394719, "grad_norm": 1.5595247745513916, "learning_rate": 1.2769318998731856e-05, "loss": 2.961712646484375, "step": 44840 }, { "epoch": 0.36203514606523896, "grad_norm": 1.0154986381530762, "learning_rate": 1.2767703528993644e-05, "loss": 2.7427623748779295, "step": 44850 }, { "epoch": 0.36211586739100604, "grad_norm": 3.246495008468628, "learning_rate": 1.276608805925543e-05, "loss": 3.5085826873779298, "step": 44860 }, { "epoch": 0.36219658871677307, "grad_norm": 0.7347347736358643, "learning_rate": 1.2764472589517218e-05, "loss": 2.868033790588379, "step": 44870 }, { "epoch": 0.36227731004254016, "grad_norm": 0.8568599820137024, "learning_rate": 1.2762857119779004e-05, "loss": 2.9523706436157227, "step": 44880 }, { "epoch": 0.3623580313683072, "grad_norm": 0.7349877953529358, "learning_rate": 1.2761241650040791e-05, "loss": 2.5916276931762696, "step": 44890 }, { "epoch": 0.3624387526940743, "grad_norm": 0.6806245446205139, "learning_rate": 1.2759626180302577e-05, "loss": 2.9348073959350587, "step": 44900 }, { "epoch": 0.3625194740198413, "grad_norm": 1.1953437328338623, "learning_rate": 1.2758010710564365e-05, "loss": 3.3226795196533203, "step": 44910 }, { "epoch": 0.36260019534560833, "grad_norm": 0.5304166078567505, "learning_rate": 1.2756395240826151e-05, "loss": 3.2276985168457033, "step": 44920 }, { "epoch": 0.3626809166713754, "grad_norm": 0.9157361388206482, "learning_rate": 1.2754779771087939e-05, "loss": 2.888931465148926, "step": 44930 }, { "epoch": 0.36276163799714245, "grad_norm": 0.5192028284072876, "learning_rate": 1.2753164301349725e-05, "loss": 2.913921928405762, "step": 44940 }, { "epoch": 0.36284235932290954, "grad_norm": 1.1763213872909546, "learning_rate": 1.2751548831611512e-05, "loss": 3.1584445953369142, "step": 44950 }, { "epoch": 0.36292308064867657, "grad_norm": 0.7241448163986206, "learning_rate": 1.2749933361873298e-05, "loss": 2.6567415237426757, "step": 44960 }, { "epoch": 0.36300380197444365, "grad_norm": 1.2572661638259888, "learning_rate": 1.2748317892135086e-05, "loss": 2.643614387512207, "step": 44970 }, { "epoch": 0.3630845233002107, "grad_norm": 0.9817606210708618, "learning_rate": 1.2746702422396872e-05, "loss": 2.707121658325195, "step": 44980 }, { "epoch": 0.3631652446259777, "grad_norm": 0.9059287905693054, "learning_rate": 1.274508695265866e-05, "loss": 2.730331039428711, "step": 44990 }, { "epoch": 0.3632459659517448, "grad_norm": 0.6321625113487244, "learning_rate": 1.2743471482920446e-05, "loss": 2.7618698120117187, "step": 45000 }, { "epoch": 0.36332668727751183, "grad_norm": 1.2696563005447388, "learning_rate": 1.2741856013182235e-05, "loss": 3.744430923461914, "step": 45010 }, { "epoch": 0.3634074086032789, "grad_norm": 0.577461838722229, "learning_rate": 1.274024054344402e-05, "loss": 2.999283218383789, "step": 45020 }, { "epoch": 0.36348812992904594, "grad_norm": 0.7610417604446411, "learning_rate": 1.2738625073705809e-05, "loss": 3.1009183883666993, "step": 45030 }, { "epoch": 0.36356885125481303, "grad_norm": 0.8844124674797058, "learning_rate": 1.2737009603967593e-05, "loss": 2.770713233947754, "step": 45040 }, { "epoch": 0.36364957258058006, "grad_norm": 1.0155802965164185, "learning_rate": 1.2735394134229383e-05, "loss": 2.7059070587158205, "step": 45050 }, { "epoch": 0.3637302939063471, "grad_norm": 0.9740664958953857, "learning_rate": 1.2733778664491167e-05, "loss": 2.827471923828125, "step": 45060 }, { "epoch": 0.3638110152321142, "grad_norm": 1.0646865367889404, "learning_rate": 1.2732163194752956e-05, "loss": 2.8336824417114257, "step": 45070 }, { "epoch": 0.3638917365578812, "grad_norm": 0.9409306645393372, "learning_rate": 1.273054772501474e-05, "loss": 3.0833791732788085, "step": 45080 }, { "epoch": 0.3639724578836483, "grad_norm": 0.7103193998336792, "learning_rate": 1.272893225527653e-05, "loss": 2.618613624572754, "step": 45090 }, { "epoch": 0.3640531792094153, "grad_norm": 0.8560624718666077, "learning_rate": 1.2727316785538314e-05, "loss": 3.228626251220703, "step": 45100 }, { "epoch": 0.3641339005351824, "grad_norm": 1.0547837018966675, "learning_rate": 1.2725701315800104e-05, "loss": 3.3041915893554688, "step": 45110 }, { "epoch": 0.36421462186094944, "grad_norm": 0.6519068479537964, "learning_rate": 1.2724085846061888e-05, "loss": 2.397270393371582, "step": 45120 }, { "epoch": 0.3642953431867165, "grad_norm": 0.9948951005935669, "learning_rate": 1.2722470376323677e-05, "loss": 3.0153783798217773, "step": 45130 }, { "epoch": 0.36437606451248356, "grad_norm": 0.9011346697807312, "learning_rate": 1.2720854906585462e-05, "loss": 2.8362197875976562, "step": 45140 }, { "epoch": 0.3644567858382506, "grad_norm": 0.8264791369438171, "learning_rate": 1.2719239436847251e-05, "loss": 2.945512390136719, "step": 45150 }, { "epoch": 0.36453750716401767, "grad_norm": 0.9878186583518982, "learning_rate": 1.2717623967109035e-05, "loss": 2.8345354080200194, "step": 45160 }, { "epoch": 0.3646182284897847, "grad_norm": 0.9835812449455261, "learning_rate": 1.2716008497370825e-05, "loss": 3.028109550476074, "step": 45170 }, { "epoch": 0.3646989498155518, "grad_norm": 0.8664767742156982, "learning_rate": 1.271439302763261e-05, "loss": 2.700778770446777, "step": 45180 }, { "epoch": 0.3647796711413188, "grad_norm": 0.5240271091461182, "learning_rate": 1.2712777557894398e-05, "loss": 2.642278480529785, "step": 45190 }, { "epoch": 0.3648603924670859, "grad_norm": 0.6421008706092834, "learning_rate": 1.2711162088156184e-05, "loss": 3.022242546081543, "step": 45200 }, { "epoch": 0.36494111379285293, "grad_norm": 1.193332552909851, "learning_rate": 1.2709546618417972e-05, "loss": 3.1182069778442383, "step": 45210 }, { "epoch": 0.36502183511861996, "grad_norm": 0.8237985372543335, "learning_rate": 1.2707931148679758e-05, "loss": 3.1905263900756835, "step": 45220 }, { "epoch": 0.36510255644438705, "grad_norm": 0.7013501524925232, "learning_rate": 1.2706315678941546e-05, "loss": 3.1572723388671875, "step": 45230 }, { "epoch": 0.3651832777701541, "grad_norm": 0.7921153903007507, "learning_rate": 1.2704700209203332e-05, "loss": 3.210305404663086, "step": 45240 }, { "epoch": 0.36526399909592117, "grad_norm": 0.8225613832473755, "learning_rate": 1.270308473946512e-05, "loss": 2.92559871673584, "step": 45250 }, { "epoch": 0.3653447204216882, "grad_norm": 0.7388367056846619, "learning_rate": 1.2701469269726906e-05, "loss": 3.309256362915039, "step": 45260 }, { "epoch": 0.3654254417474553, "grad_norm": 0.8722954392433167, "learning_rate": 1.2699853799988693e-05, "loss": 3.0694456100463867, "step": 45270 }, { "epoch": 0.3655061630732223, "grad_norm": 0.7993379831314087, "learning_rate": 1.269823833025048e-05, "loss": 2.9672414779663088, "step": 45280 }, { "epoch": 0.36558688439898934, "grad_norm": 0.5859812498092651, "learning_rate": 1.2696622860512267e-05, "loss": 2.7679426193237306, "step": 45290 }, { "epoch": 0.36566760572475643, "grad_norm": 0.8762497305870056, "learning_rate": 1.2695007390774053e-05, "loss": 2.78665714263916, "step": 45300 }, { "epoch": 0.36574832705052346, "grad_norm": 1.2641139030456543, "learning_rate": 1.269339192103584e-05, "loss": 3.196644973754883, "step": 45310 }, { "epoch": 0.36582904837629054, "grad_norm": 0.8268104195594788, "learning_rate": 1.2691776451297627e-05, "loss": 2.8627687454223634, "step": 45320 }, { "epoch": 0.3659097697020576, "grad_norm": 0.6658362150192261, "learning_rate": 1.2690160981559414e-05, "loss": 3.337957000732422, "step": 45330 }, { "epoch": 0.36599049102782466, "grad_norm": 1.0445064306259155, "learning_rate": 1.26885455118212e-05, "loss": 3.0632888793945314, "step": 45340 }, { "epoch": 0.3660712123535917, "grad_norm": 1.6189801692962646, "learning_rate": 1.2686930042082988e-05, "loss": 2.7437976837158202, "step": 45350 }, { "epoch": 0.3661519336793588, "grad_norm": 0.7614535689353943, "learning_rate": 1.2685314572344774e-05, "loss": 2.9358760833740236, "step": 45360 }, { "epoch": 0.3662326550051258, "grad_norm": 1.3166828155517578, "learning_rate": 1.2683699102606562e-05, "loss": 3.039107322692871, "step": 45370 }, { "epoch": 0.36631337633089284, "grad_norm": 0.898235023021698, "learning_rate": 1.268208363286835e-05, "loss": 2.9595821380615233, "step": 45380 }, { "epoch": 0.3663940976566599, "grad_norm": 0.8091630339622498, "learning_rate": 1.2680468163130135e-05, "loss": 2.789471244812012, "step": 45390 }, { "epoch": 0.36647481898242695, "grad_norm": 1.1277450323104858, "learning_rate": 1.2678852693391923e-05, "loss": 3.1271434783935548, "step": 45400 }, { "epoch": 0.36655554030819404, "grad_norm": 1.091391921043396, "learning_rate": 1.2677237223653709e-05, "loss": 3.3641647338867187, "step": 45410 }, { "epoch": 0.36663626163396107, "grad_norm": 2.9030323028564453, "learning_rate": 1.2675621753915497e-05, "loss": 3.2415809631347656, "step": 45420 }, { "epoch": 0.36671698295972815, "grad_norm": 1.1297791004180908, "learning_rate": 1.2674006284177283e-05, "loss": 2.8755825042724608, "step": 45430 }, { "epoch": 0.3667977042854952, "grad_norm": 0.7659066319465637, "learning_rate": 1.267239081443907e-05, "loss": 2.7471797943115233, "step": 45440 }, { "epoch": 0.3668784256112622, "grad_norm": 0.999182403087616, "learning_rate": 1.2670775344700856e-05, "loss": 2.694715118408203, "step": 45450 }, { "epoch": 0.3669591469370293, "grad_norm": 0.6376758217811584, "learning_rate": 1.2669159874962644e-05, "loss": 2.7137374877929688, "step": 45460 }, { "epoch": 0.36703986826279633, "grad_norm": 0.8343366980552673, "learning_rate": 1.266754440522443e-05, "loss": 3.2376102447509765, "step": 45470 }, { "epoch": 0.3671205895885634, "grad_norm": 1.5832009315490723, "learning_rate": 1.2665928935486218e-05, "loss": 3.3362850189208983, "step": 45480 }, { "epoch": 0.36720131091433045, "grad_norm": 0.9725211262702942, "learning_rate": 1.2664313465748004e-05, "loss": 3.048047637939453, "step": 45490 }, { "epoch": 0.36728203224009753, "grad_norm": 0.9575350880622864, "learning_rate": 1.2662697996009792e-05, "loss": 2.9648876190185547, "step": 45500 }, { "epoch": 0.36736275356586456, "grad_norm": 1.0871995687484741, "learning_rate": 1.2661082526271578e-05, "loss": 3.021257019042969, "step": 45510 }, { "epoch": 0.3674434748916316, "grad_norm": 0.8049257397651672, "learning_rate": 1.2659467056533365e-05, "loss": 3.002367401123047, "step": 45520 }, { "epoch": 0.3675241962173987, "grad_norm": 1.102892518043518, "learning_rate": 1.2657851586795151e-05, "loss": 3.2848125457763673, "step": 45530 }, { "epoch": 0.3676049175431657, "grad_norm": 1.0859380960464478, "learning_rate": 1.2656236117056939e-05, "loss": 2.9270931243896485, "step": 45540 }, { "epoch": 0.3676856388689328, "grad_norm": 0.6672194600105286, "learning_rate": 1.2654620647318725e-05, "loss": 3.4012920379638674, "step": 45550 }, { "epoch": 0.3677663601946998, "grad_norm": 0.9035142064094543, "learning_rate": 1.2653005177580513e-05, "loss": 2.6893259048461915, "step": 45560 }, { "epoch": 0.3678470815204669, "grad_norm": 0.9069554805755615, "learning_rate": 1.2651389707842299e-05, "loss": 2.815568733215332, "step": 45570 }, { "epoch": 0.36792780284623394, "grad_norm": 0.8477001190185547, "learning_rate": 1.2649774238104086e-05, "loss": 2.569575309753418, "step": 45580 }, { "epoch": 0.36800852417200103, "grad_norm": 0.6254304051399231, "learning_rate": 1.2648158768365872e-05, "loss": 2.834261894226074, "step": 45590 }, { "epoch": 0.36808924549776806, "grad_norm": 0.8124837875366211, "learning_rate": 1.264654329862766e-05, "loss": 3.060613250732422, "step": 45600 }, { "epoch": 0.3681699668235351, "grad_norm": 0.6842703223228455, "learning_rate": 1.2644927828889446e-05, "loss": 2.9389625549316407, "step": 45610 }, { "epoch": 0.3682506881493022, "grad_norm": 0.9001117944717407, "learning_rate": 1.2643312359151234e-05, "loss": 2.713246726989746, "step": 45620 }, { "epoch": 0.3683314094750692, "grad_norm": 0.6509536504745483, "learning_rate": 1.264169688941302e-05, "loss": 3.215620422363281, "step": 45630 }, { "epoch": 0.3684121308008363, "grad_norm": 0.7734279632568359, "learning_rate": 1.2640081419674807e-05, "loss": 3.2061351776123046, "step": 45640 }, { "epoch": 0.3684928521266033, "grad_norm": 1.0508869886398315, "learning_rate": 1.2638465949936593e-05, "loss": 2.4889009475708006, "step": 45650 }, { "epoch": 0.3685735734523704, "grad_norm": 0.6440957188606262, "learning_rate": 1.2636850480198381e-05, "loss": 2.8654720306396486, "step": 45660 }, { "epoch": 0.36865429477813744, "grad_norm": 0.9157008528709412, "learning_rate": 1.2635235010460167e-05, "loss": 3.136572074890137, "step": 45670 }, { "epoch": 0.36873501610390447, "grad_norm": 0.7936660051345825, "learning_rate": 1.2633619540721955e-05, "loss": 2.9160789489746093, "step": 45680 }, { "epoch": 0.36881573742967155, "grad_norm": 0.9760968089103699, "learning_rate": 1.263200407098374e-05, "loss": 3.4925689697265625, "step": 45690 }, { "epoch": 0.3688964587554386, "grad_norm": 1.0021411180496216, "learning_rate": 1.2630388601245528e-05, "loss": 2.679706001281738, "step": 45700 }, { "epoch": 0.36897718008120567, "grad_norm": 0.8289865851402283, "learning_rate": 1.2628773131507314e-05, "loss": 2.96848087310791, "step": 45710 }, { "epoch": 0.3690579014069727, "grad_norm": 0.49772322177886963, "learning_rate": 1.2627157661769102e-05, "loss": 3.1707942962646483, "step": 45720 }, { "epoch": 0.3691386227327398, "grad_norm": 0.8915542364120483, "learning_rate": 1.2625542192030888e-05, "loss": 3.0230079650878907, "step": 45730 }, { "epoch": 0.3692193440585068, "grad_norm": 0.9236674308776855, "learning_rate": 1.2623926722292676e-05, "loss": 3.448159408569336, "step": 45740 }, { "epoch": 0.36930006538427385, "grad_norm": 0.997684895992279, "learning_rate": 1.2622311252554462e-05, "loss": 3.5639835357666017, "step": 45750 }, { "epoch": 0.36938078671004093, "grad_norm": 1.6949166059494019, "learning_rate": 1.262069578281625e-05, "loss": 2.941854476928711, "step": 45760 }, { "epoch": 0.36946150803580796, "grad_norm": 1.5867968797683716, "learning_rate": 1.2619080313078036e-05, "loss": 2.874491310119629, "step": 45770 }, { "epoch": 0.36954222936157505, "grad_norm": 0.9699234366416931, "learning_rate": 1.2617464843339823e-05, "loss": 2.85833683013916, "step": 45780 }, { "epoch": 0.3696229506873421, "grad_norm": 0.9420885443687439, "learning_rate": 1.261584937360161e-05, "loss": 2.7055147171020506, "step": 45790 }, { "epoch": 0.36970367201310916, "grad_norm": 0.8747164607048035, "learning_rate": 1.2614233903863397e-05, "loss": 3.023986053466797, "step": 45800 }, { "epoch": 0.3697843933388762, "grad_norm": 1.2479254007339478, "learning_rate": 1.2612618434125183e-05, "loss": 2.811206245422363, "step": 45810 }, { "epoch": 0.3698651146646433, "grad_norm": 1.169134497642517, "learning_rate": 1.261100296438697e-05, "loss": 2.5587339401245117, "step": 45820 }, { "epoch": 0.3699458359904103, "grad_norm": 0.9847345948219299, "learning_rate": 1.2609387494648757e-05, "loss": 2.957868766784668, "step": 45830 }, { "epoch": 0.37002655731617734, "grad_norm": 1.1772710084915161, "learning_rate": 1.2607772024910544e-05, "loss": 3.0595048904418944, "step": 45840 }, { "epoch": 0.3701072786419444, "grad_norm": 0.8720998167991638, "learning_rate": 1.260615655517233e-05, "loss": 2.9642898559570314, "step": 45850 }, { "epoch": 0.37018799996771146, "grad_norm": 1.2697110176086426, "learning_rate": 1.2604541085434118e-05, "loss": 2.849443054199219, "step": 45860 }, { "epoch": 0.37026872129347854, "grad_norm": 0.9680643677711487, "learning_rate": 1.2602925615695904e-05, "loss": 2.6865062713623047, "step": 45870 }, { "epoch": 0.3703494426192456, "grad_norm": 0.9196120500564575, "learning_rate": 1.2601310145957693e-05, "loss": 3.349710464477539, "step": 45880 }, { "epoch": 0.37043016394501266, "grad_norm": 1.053734302520752, "learning_rate": 1.2599694676219478e-05, "loss": 2.9399896621704102, "step": 45890 }, { "epoch": 0.3705108852707797, "grad_norm": 0.7548711895942688, "learning_rate": 1.2598079206481267e-05, "loss": 2.5879554748535156, "step": 45900 }, { "epoch": 0.3705916065965467, "grad_norm": 1.059587836265564, "learning_rate": 1.2596463736743051e-05, "loss": 3.0652849197387697, "step": 45910 }, { "epoch": 0.3706723279223138, "grad_norm": 0.9292479753494263, "learning_rate": 1.259484826700484e-05, "loss": 2.840526008605957, "step": 45920 }, { "epoch": 0.37075304924808083, "grad_norm": 0.6990494728088379, "learning_rate": 1.2593232797266625e-05, "loss": 3.002148246765137, "step": 45930 }, { "epoch": 0.3708337705738479, "grad_norm": 1.018879771232605, "learning_rate": 1.2591617327528414e-05, "loss": 2.9634347915649415, "step": 45940 }, { "epoch": 0.37091449189961495, "grad_norm": 0.6657718420028687, "learning_rate": 1.2590001857790199e-05, "loss": 3.3957714080810546, "step": 45950 }, { "epoch": 0.37099521322538204, "grad_norm": 1.1859129667282104, "learning_rate": 1.2588386388051988e-05, "loss": 3.077602005004883, "step": 45960 }, { "epoch": 0.37107593455114907, "grad_norm": 1.143710970878601, "learning_rate": 1.2586770918313772e-05, "loss": 3.120773506164551, "step": 45970 }, { "epoch": 0.3711566558769161, "grad_norm": 1.3003122806549072, "learning_rate": 1.2585155448575562e-05, "loss": 3.166499710083008, "step": 45980 }, { "epoch": 0.3712373772026832, "grad_norm": 0.7845035791397095, "learning_rate": 1.2583539978837346e-05, "loss": 3.2153903961181642, "step": 45990 }, { "epoch": 0.3713180985284502, "grad_norm": 1.4460225105285645, "learning_rate": 1.2581924509099136e-05, "loss": 3.370369720458984, "step": 46000 }, { "epoch": 0.3713988198542173, "grad_norm": 0.9355176091194153, "learning_rate": 1.258030903936092e-05, "loss": 2.551719856262207, "step": 46010 }, { "epoch": 0.37147954117998433, "grad_norm": 1.396776556968689, "learning_rate": 1.257869356962271e-05, "loss": 3.463530731201172, "step": 46020 }, { "epoch": 0.3715602625057514, "grad_norm": 1.6224687099456787, "learning_rate": 1.2577078099884494e-05, "loss": 2.729775810241699, "step": 46030 }, { "epoch": 0.37164098383151845, "grad_norm": 0.89911949634552, "learning_rate": 1.2575462630146283e-05, "loss": 2.4624277114868165, "step": 46040 }, { "epoch": 0.3717217051572855, "grad_norm": 0.7713939547538757, "learning_rate": 1.2573847160408069e-05, "loss": 3.15546875, "step": 46050 }, { "epoch": 0.37180242648305256, "grad_norm": 1.0479663610458374, "learning_rate": 1.2572231690669857e-05, "loss": 2.557583236694336, "step": 46060 }, { "epoch": 0.3718831478088196, "grad_norm": 1.1111177206039429, "learning_rate": 1.2570616220931643e-05, "loss": 2.723139190673828, "step": 46070 }, { "epoch": 0.3719638691345867, "grad_norm": 0.8229133486747742, "learning_rate": 1.256900075119343e-05, "loss": 2.7315496444702148, "step": 46080 }, { "epoch": 0.3720445904603537, "grad_norm": 1.2350417375564575, "learning_rate": 1.2567385281455216e-05, "loss": 2.8535322189331054, "step": 46090 }, { "epoch": 0.3721253117861208, "grad_norm": 0.7544927000999451, "learning_rate": 1.2565769811717004e-05, "loss": 3.10247859954834, "step": 46100 }, { "epoch": 0.3722060331118878, "grad_norm": 0.591679573059082, "learning_rate": 1.256415434197879e-05, "loss": 3.055373191833496, "step": 46110 }, { "epoch": 0.3722867544376549, "grad_norm": 1.0492713451385498, "learning_rate": 1.2562538872240578e-05, "loss": 2.8812381744384767, "step": 46120 }, { "epoch": 0.37236747576342194, "grad_norm": 0.9227235317230225, "learning_rate": 1.2560923402502364e-05, "loss": 3.018810272216797, "step": 46130 }, { "epoch": 0.37244819708918897, "grad_norm": 0.7706133723258972, "learning_rate": 1.2559307932764151e-05, "loss": 2.501203918457031, "step": 46140 }, { "epoch": 0.37252891841495606, "grad_norm": 0.80357825756073, "learning_rate": 1.2557692463025937e-05, "loss": 2.804661750793457, "step": 46150 }, { "epoch": 0.3726096397407231, "grad_norm": 1.418614149093628, "learning_rate": 1.2556076993287725e-05, "loss": 2.681643486022949, "step": 46160 }, { "epoch": 0.3726903610664902, "grad_norm": 0.9215155839920044, "learning_rate": 1.2554461523549511e-05, "loss": 2.667922782897949, "step": 46170 }, { "epoch": 0.3727710823922572, "grad_norm": 0.9285188317298889, "learning_rate": 1.2552846053811299e-05, "loss": 2.824852180480957, "step": 46180 }, { "epoch": 0.3728518037180243, "grad_norm": 0.6777017712593079, "learning_rate": 1.2551230584073085e-05, "loss": 2.821282958984375, "step": 46190 }, { "epoch": 0.3729325250437913, "grad_norm": 0.838093101978302, "learning_rate": 1.2549615114334872e-05, "loss": 3.1976356506347656, "step": 46200 }, { "epoch": 0.37301324636955835, "grad_norm": 0.8141591548919678, "learning_rate": 1.2547999644596658e-05, "loss": 3.0159664154052734, "step": 46210 }, { "epoch": 0.37309396769532543, "grad_norm": 0.9851174354553223, "learning_rate": 1.2546384174858446e-05, "loss": 2.900499153137207, "step": 46220 }, { "epoch": 0.37317468902109246, "grad_norm": 0.6303122043609619, "learning_rate": 1.2544768705120232e-05, "loss": 2.6647891998291016, "step": 46230 }, { "epoch": 0.37325541034685955, "grad_norm": 0.8631957769393921, "learning_rate": 1.254315323538202e-05, "loss": 2.8616659164428713, "step": 46240 }, { "epoch": 0.3733361316726266, "grad_norm": 0.8735802173614502, "learning_rate": 1.2541537765643806e-05, "loss": 2.762269401550293, "step": 46250 }, { "epoch": 0.37341685299839367, "grad_norm": 1.020315170288086, "learning_rate": 1.2539922295905594e-05, "loss": 2.7220813751220705, "step": 46260 }, { "epoch": 0.3734975743241607, "grad_norm": 1.121687889099121, "learning_rate": 1.253830682616738e-05, "loss": 3.438340759277344, "step": 46270 }, { "epoch": 0.3735782956499277, "grad_norm": 0.5952447056770325, "learning_rate": 1.2536691356429167e-05, "loss": 2.5243263244628906, "step": 46280 }, { "epoch": 0.3736590169756948, "grad_norm": 0.9961744546890259, "learning_rate": 1.2535075886690953e-05, "loss": 3.3484493255615235, "step": 46290 }, { "epoch": 0.37373973830146184, "grad_norm": 1.0231105089187622, "learning_rate": 1.2533460416952741e-05, "loss": 2.627247619628906, "step": 46300 }, { "epoch": 0.37382045962722893, "grad_norm": 0.7146442532539368, "learning_rate": 1.2531844947214527e-05, "loss": 2.831546401977539, "step": 46310 }, { "epoch": 0.37390118095299596, "grad_norm": 0.8136728405952454, "learning_rate": 1.2530229477476315e-05, "loss": 2.7759271621704102, "step": 46320 }, { "epoch": 0.37398190227876305, "grad_norm": 0.7924312353134155, "learning_rate": 1.25286140077381e-05, "loss": 2.8324962615966798, "step": 46330 }, { "epoch": 0.3740626236045301, "grad_norm": 0.8996555209159851, "learning_rate": 1.2526998537999888e-05, "loss": 2.838986396789551, "step": 46340 }, { "epoch": 0.37414334493029716, "grad_norm": 0.8363080024719238, "learning_rate": 1.2525383068261674e-05, "loss": 2.998134231567383, "step": 46350 }, { "epoch": 0.3742240662560642, "grad_norm": 0.8192769289016724, "learning_rate": 1.2523767598523462e-05, "loss": 3.1309080123901367, "step": 46360 }, { "epoch": 0.3743047875818312, "grad_norm": 1.2023046016693115, "learning_rate": 1.2522152128785248e-05, "loss": 3.0724010467529297, "step": 46370 }, { "epoch": 0.3743855089075983, "grad_norm": 0.8552799820899963, "learning_rate": 1.2520536659047036e-05, "loss": 2.8567508697509765, "step": 46380 }, { "epoch": 0.37446623023336534, "grad_norm": 1.062208890914917, "learning_rate": 1.2518921189308822e-05, "loss": 3.0592744827270506, "step": 46390 }, { "epoch": 0.3745469515591324, "grad_norm": 1.2000499963760376, "learning_rate": 1.251730571957061e-05, "loss": 2.797205924987793, "step": 46400 }, { "epoch": 0.37462767288489945, "grad_norm": 1.1885490417480469, "learning_rate": 1.2515690249832395e-05, "loss": 2.9565385818481444, "step": 46410 }, { "epoch": 0.37470839421066654, "grad_norm": 0.6347295045852661, "learning_rate": 1.2514074780094183e-05, "loss": 3.213506317138672, "step": 46420 }, { "epoch": 0.37478911553643357, "grad_norm": 0.920385479927063, "learning_rate": 1.2512459310355969e-05, "loss": 2.548351860046387, "step": 46430 }, { "epoch": 0.3748698368622006, "grad_norm": 0.9758346676826477, "learning_rate": 1.2510843840617757e-05, "loss": 2.7978492736816407, "step": 46440 }, { "epoch": 0.3749505581879677, "grad_norm": 0.9134461283683777, "learning_rate": 1.2509228370879543e-05, "loss": 3.0275367736816405, "step": 46450 }, { "epoch": 0.3750312795137347, "grad_norm": 0.9960325360298157, "learning_rate": 1.250761290114133e-05, "loss": 2.7754337310791017, "step": 46460 }, { "epoch": 0.3751120008395018, "grad_norm": 0.8385682702064514, "learning_rate": 1.2505997431403116e-05, "loss": 2.8973546981811524, "step": 46470 }, { "epoch": 0.37519272216526883, "grad_norm": 1.1321449279785156, "learning_rate": 1.2504381961664904e-05, "loss": 2.9842609405517577, "step": 46480 }, { "epoch": 0.3752734434910359, "grad_norm": 1.150481939315796, "learning_rate": 1.250276649192669e-05, "loss": 2.9047346115112305, "step": 46490 }, { "epoch": 0.37535416481680295, "grad_norm": 0.8611885905265808, "learning_rate": 1.2501151022188478e-05, "loss": 2.849310874938965, "step": 46500 }, { "epoch": 0.37543488614257, "grad_norm": 0.7925350069999695, "learning_rate": 1.2499535552450264e-05, "loss": 3.0640193939208986, "step": 46510 }, { "epoch": 0.37551560746833706, "grad_norm": 1.395530104637146, "learning_rate": 1.2497920082712051e-05, "loss": 3.025289535522461, "step": 46520 }, { "epoch": 0.3755963287941041, "grad_norm": 0.9128088355064392, "learning_rate": 1.2496304612973837e-05, "loss": 3.065989685058594, "step": 46530 }, { "epoch": 0.3756770501198712, "grad_norm": 1.7148782014846802, "learning_rate": 1.2494689143235625e-05, "loss": 2.9451284408569336, "step": 46540 }, { "epoch": 0.3757577714456382, "grad_norm": 2.0963032245635986, "learning_rate": 1.2493073673497411e-05, "loss": 2.484798812866211, "step": 46550 }, { "epoch": 0.3758384927714053, "grad_norm": 1.1802562475204468, "learning_rate": 1.2491458203759199e-05, "loss": 2.5715566635131837, "step": 46560 }, { "epoch": 0.3759192140971723, "grad_norm": 0.8496401309967041, "learning_rate": 1.2489842734020985e-05, "loss": 2.992620849609375, "step": 46570 }, { "epoch": 0.3759999354229394, "grad_norm": 0.7906182408332825, "learning_rate": 1.2488227264282773e-05, "loss": 3.0343164443969726, "step": 46580 }, { "epoch": 0.37608065674870644, "grad_norm": 0.9693819880485535, "learning_rate": 1.2486611794544559e-05, "loss": 2.8613454818725588, "step": 46590 }, { "epoch": 0.3761613780744735, "grad_norm": 0.8471150994300842, "learning_rate": 1.2484996324806346e-05, "loss": 3.5664329528808594, "step": 46600 }, { "epoch": 0.37624209940024056, "grad_norm": 1.1574971675872803, "learning_rate": 1.2483380855068132e-05, "loss": 2.821019744873047, "step": 46610 }, { "epoch": 0.3763228207260076, "grad_norm": 0.8054177165031433, "learning_rate": 1.248176538532992e-05, "loss": 3.00640869140625, "step": 46620 }, { "epoch": 0.3764035420517747, "grad_norm": 0.6702529788017273, "learning_rate": 1.2480149915591708e-05, "loss": 2.9572410583496094, "step": 46630 }, { "epoch": 0.3764842633775417, "grad_norm": 0.7337834239006042, "learning_rate": 1.2478534445853494e-05, "loss": 3.027688217163086, "step": 46640 }, { "epoch": 0.3765649847033088, "grad_norm": 0.7574622631072998, "learning_rate": 1.2476918976115281e-05, "loss": 3.06188850402832, "step": 46650 }, { "epoch": 0.3766457060290758, "grad_norm": 1.0709055662155151, "learning_rate": 1.2475303506377067e-05, "loss": 2.8250717163085937, "step": 46660 }, { "epoch": 0.37672642735484285, "grad_norm": 0.7510419487953186, "learning_rate": 1.2473688036638855e-05, "loss": 2.701283836364746, "step": 46670 }, { "epoch": 0.37680714868060994, "grad_norm": 1.2038973569869995, "learning_rate": 1.2472072566900641e-05, "loss": 2.587484359741211, "step": 46680 }, { "epoch": 0.37688787000637697, "grad_norm": 0.854415774345398, "learning_rate": 1.2470457097162429e-05, "loss": 2.6600027084350586, "step": 46690 }, { "epoch": 0.37696859133214405, "grad_norm": 1.4098503589630127, "learning_rate": 1.2468841627424215e-05, "loss": 2.6170482635498047, "step": 46700 }, { "epoch": 0.3770493126579111, "grad_norm": 0.8311848044395447, "learning_rate": 1.2467226157686002e-05, "loss": 3.1337514877319337, "step": 46710 }, { "epoch": 0.37713003398367817, "grad_norm": 1.0882132053375244, "learning_rate": 1.2465610687947788e-05, "loss": 2.7742815017700195, "step": 46720 }, { "epoch": 0.3772107553094452, "grad_norm": 0.914594292640686, "learning_rate": 1.2463995218209576e-05, "loss": 2.7260032653808595, "step": 46730 }, { "epoch": 0.37729147663521223, "grad_norm": 0.9459596276283264, "learning_rate": 1.2462379748471362e-05, "loss": 3.212966537475586, "step": 46740 }, { "epoch": 0.3773721979609793, "grad_norm": 0.9937347769737244, "learning_rate": 1.2460764278733151e-05, "loss": 2.7451271057128905, "step": 46750 }, { "epoch": 0.37745291928674635, "grad_norm": 0.9984515309333801, "learning_rate": 1.2459148808994936e-05, "loss": 2.9022171020507814, "step": 46760 }, { "epoch": 0.37753364061251343, "grad_norm": 0.9376488924026489, "learning_rate": 1.2457533339256725e-05, "loss": 2.8339576721191406, "step": 46770 }, { "epoch": 0.37761436193828046, "grad_norm": 1.0963842868804932, "learning_rate": 1.245591786951851e-05, "loss": 2.589227294921875, "step": 46780 }, { "epoch": 0.37769508326404755, "grad_norm": 1.1755772829055786, "learning_rate": 1.2454302399780299e-05, "loss": 3.3267974853515625, "step": 46790 }, { "epoch": 0.3777758045898146, "grad_norm": 1.7206079959869385, "learning_rate": 1.2452686930042083e-05, "loss": 3.0246118545532226, "step": 46800 }, { "epoch": 0.37785652591558166, "grad_norm": 1.0031582117080688, "learning_rate": 1.2451071460303873e-05, "loss": 3.2251800537109374, "step": 46810 }, { "epoch": 0.3779372472413487, "grad_norm": 0.7586411237716675, "learning_rate": 1.2449455990565657e-05, "loss": 2.8790462493896483, "step": 46820 }, { "epoch": 0.3780179685671157, "grad_norm": 1.1261807680130005, "learning_rate": 1.2447840520827446e-05, "loss": 3.2655971527099608, "step": 46830 }, { "epoch": 0.3780986898928828, "grad_norm": 0.7738911509513855, "learning_rate": 1.244622505108923e-05, "loss": 2.9384897232055662, "step": 46840 }, { "epoch": 0.37817941121864984, "grad_norm": 1.1277352571487427, "learning_rate": 1.244460958135102e-05, "loss": 2.626516342163086, "step": 46850 }, { "epoch": 0.3782601325444169, "grad_norm": 0.8572729229927063, "learning_rate": 1.2442994111612804e-05, "loss": 2.605287551879883, "step": 46860 }, { "epoch": 0.37834085387018396, "grad_norm": 1.0592036247253418, "learning_rate": 1.2441378641874594e-05, "loss": 2.9586341857910154, "step": 46870 }, { "epoch": 0.37842157519595104, "grad_norm": 0.8614901304244995, "learning_rate": 1.2439763172136378e-05, "loss": 2.813458251953125, "step": 46880 }, { "epoch": 0.3785022965217181, "grad_norm": 0.9816926717758179, "learning_rate": 1.2438147702398167e-05, "loss": 2.7199380874633787, "step": 46890 }, { "epoch": 0.3785830178474851, "grad_norm": 1.2123571634292603, "learning_rate": 1.2436532232659952e-05, "loss": 2.6220882415771483, "step": 46900 }, { "epoch": 0.3786637391732522, "grad_norm": 0.7211576700210571, "learning_rate": 1.2434916762921741e-05, "loss": 3.2155113220214844, "step": 46910 }, { "epoch": 0.3787444604990192, "grad_norm": 0.9459031820297241, "learning_rate": 1.2433301293183527e-05, "loss": 2.9094457626342773, "step": 46920 }, { "epoch": 0.3788251818247863, "grad_norm": 1.1937618255615234, "learning_rate": 1.2431685823445315e-05, "loss": 3.023975944519043, "step": 46930 }, { "epoch": 0.37890590315055334, "grad_norm": 0.854625940322876, "learning_rate": 1.24300703537071e-05, "loss": 2.8575363159179688, "step": 46940 }, { "epoch": 0.3789866244763204, "grad_norm": 0.836724579334259, "learning_rate": 1.2428454883968888e-05, "loss": 2.465157318115234, "step": 46950 }, { "epoch": 0.37906734580208745, "grad_norm": 1.0288811922073364, "learning_rate": 1.2426839414230674e-05, "loss": 3.1492868423461915, "step": 46960 }, { "epoch": 0.3791480671278545, "grad_norm": 0.9705510139465332, "learning_rate": 1.2425223944492462e-05, "loss": 3.1200809478759766, "step": 46970 }, { "epoch": 0.37922878845362157, "grad_norm": 0.9527958631515503, "learning_rate": 1.2423608474754248e-05, "loss": 2.7413509368896483, "step": 46980 }, { "epoch": 0.3793095097793886, "grad_norm": 0.8503009676933289, "learning_rate": 1.2421993005016036e-05, "loss": 2.897526741027832, "step": 46990 }, { "epoch": 0.3793902311051557, "grad_norm": 0.5930942296981812, "learning_rate": 1.2420377535277822e-05, "loss": 2.877070999145508, "step": 47000 }, { "epoch": 0.3794709524309227, "grad_norm": 0.8075498938560486, "learning_rate": 1.241876206553961e-05, "loss": 3.0134843826293944, "step": 47010 }, { "epoch": 0.3795516737566898, "grad_norm": 0.6215968728065491, "learning_rate": 1.2417146595801395e-05, "loss": 4.214923858642578, "step": 47020 }, { "epoch": 0.37963239508245683, "grad_norm": 1.1694509983062744, "learning_rate": 1.2415531126063183e-05, "loss": 3.0599668502807615, "step": 47030 }, { "epoch": 0.3797131164082239, "grad_norm": 1.0001037120819092, "learning_rate": 1.241391565632497e-05, "loss": 3.0490293502807617, "step": 47040 }, { "epoch": 0.37979383773399095, "grad_norm": 1.0187782049179077, "learning_rate": 1.2412300186586757e-05, "loss": 2.5208248138427733, "step": 47050 }, { "epoch": 0.379874559059758, "grad_norm": 1.0791194438934326, "learning_rate": 1.2410684716848543e-05, "loss": 2.9531814575195314, "step": 47060 }, { "epoch": 0.37995528038552506, "grad_norm": 0.937880277633667, "learning_rate": 1.240906924711033e-05, "loss": 2.3764625549316407, "step": 47070 }, { "epoch": 0.3800360017112921, "grad_norm": 0.7262189984321594, "learning_rate": 1.2407453777372117e-05, "loss": 3.1371694564819337, "step": 47080 }, { "epoch": 0.3801167230370592, "grad_norm": 1.320773720741272, "learning_rate": 1.2405838307633904e-05, "loss": 3.4100719451904298, "step": 47090 }, { "epoch": 0.3801974443628262, "grad_norm": 1.3152265548706055, "learning_rate": 1.240422283789569e-05, "loss": 2.7140493392944336, "step": 47100 }, { "epoch": 0.3802781656885933, "grad_norm": 0.743159294128418, "learning_rate": 1.2402607368157478e-05, "loss": 2.9119661331176756, "step": 47110 }, { "epoch": 0.3803588870143603, "grad_norm": 0.873814582824707, "learning_rate": 1.2400991898419264e-05, "loss": 4.015872573852539, "step": 47120 }, { "epoch": 0.38043960834012736, "grad_norm": 0.7891431450843811, "learning_rate": 1.2399376428681052e-05, "loss": 3.2083248138427733, "step": 47130 }, { "epoch": 0.38052032966589444, "grad_norm": 0.9264349937438965, "learning_rate": 1.2397760958942838e-05, "loss": 3.1855030059814453, "step": 47140 }, { "epoch": 0.38060105099166147, "grad_norm": 0.8431724905967712, "learning_rate": 1.2396145489204625e-05, "loss": 3.0318815231323244, "step": 47150 }, { "epoch": 0.38068177231742856, "grad_norm": 0.6352343559265137, "learning_rate": 1.2394530019466411e-05, "loss": 3.9208984375, "step": 47160 }, { "epoch": 0.3807624936431956, "grad_norm": 1.0243624448776245, "learning_rate": 1.2392914549728199e-05, "loss": 3.213580322265625, "step": 47170 }, { "epoch": 0.3808432149689627, "grad_norm": 0.8568543791770935, "learning_rate": 1.2391299079989985e-05, "loss": 3.2008460998535155, "step": 47180 }, { "epoch": 0.3809239362947297, "grad_norm": 1.3662270307540894, "learning_rate": 1.2389683610251773e-05, "loss": 2.697690010070801, "step": 47190 }, { "epoch": 0.38100465762049673, "grad_norm": 1.079795002937317, "learning_rate": 1.2388068140513559e-05, "loss": 2.849565887451172, "step": 47200 }, { "epoch": 0.3810853789462638, "grad_norm": 0.7403931617736816, "learning_rate": 1.2386452670775346e-05, "loss": 3.0878116607666017, "step": 47210 }, { "epoch": 0.38116610027203085, "grad_norm": 0.8826788663864136, "learning_rate": 1.2384837201037132e-05, "loss": 2.3836517333984375, "step": 47220 }, { "epoch": 0.38124682159779794, "grad_norm": 0.9818785786628723, "learning_rate": 1.238322173129892e-05, "loss": 2.8987850189208983, "step": 47230 }, { "epoch": 0.38132754292356497, "grad_norm": 0.8752692937850952, "learning_rate": 1.2381606261560706e-05, "loss": 3.0715641021728515, "step": 47240 }, { "epoch": 0.38140826424933205, "grad_norm": 0.5559041500091553, "learning_rate": 1.2379990791822494e-05, "loss": 2.6759143829345704, "step": 47250 }, { "epoch": 0.3814889855750991, "grad_norm": 0.9550179243087769, "learning_rate": 1.237837532208428e-05, "loss": 3.284856414794922, "step": 47260 }, { "epoch": 0.3815697069008661, "grad_norm": 1.2800034284591675, "learning_rate": 1.2376759852346067e-05, "loss": 3.073884201049805, "step": 47270 }, { "epoch": 0.3816504282266332, "grad_norm": 0.8316230177879333, "learning_rate": 1.2375144382607853e-05, "loss": 2.9255552291870117, "step": 47280 }, { "epoch": 0.38173114955240023, "grad_norm": 1.3708163499832153, "learning_rate": 1.2373528912869641e-05, "loss": 2.775917625427246, "step": 47290 }, { "epoch": 0.3818118708781673, "grad_norm": 0.8198966979980469, "learning_rate": 1.2371913443131427e-05, "loss": 2.753093719482422, "step": 47300 }, { "epoch": 0.38189259220393434, "grad_norm": 0.610937237739563, "learning_rate": 1.2370297973393215e-05, "loss": 3.0272674560546875, "step": 47310 }, { "epoch": 0.38197331352970143, "grad_norm": 1.3945447206497192, "learning_rate": 1.2368682503655e-05, "loss": 3.022772216796875, "step": 47320 }, { "epoch": 0.38205403485546846, "grad_norm": 0.8491544127464294, "learning_rate": 1.2367067033916789e-05, "loss": 3.1739738464355467, "step": 47330 }, { "epoch": 0.38213475618123555, "grad_norm": 0.6054435968399048, "learning_rate": 1.2365451564178575e-05, "loss": 3.129513740539551, "step": 47340 }, { "epoch": 0.3822154775070026, "grad_norm": 0.7021318078041077, "learning_rate": 1.2363836094440362e-05, "loss": 2.8682647705078126, "step": 47350 }, { "epoch": 0.3822961988327696, "grad_norm": 0.9482680559158325, "learning_rate": 1.2362220624702148e-05, "loss": 2.824729347229004, "step": 47360 }, { "epoch": 0.3823769201585367, "grad_norm": 0.8182543516159058, "learning_rate": 1.2360605154963936e-05, "loss": 3.044625663757324, "step": 47370 }, { "epoch": 0.3824576414843037, "grad_norm": 0.7207936644554138, "learning_rate": 1.2358989685225722e-05, "loss": 3.1199092864990234, "step": 47380 }, { "epoch": 0.3825383628100708, "grad_norm": 0.6466401815414429, "learning_rate": 1.235737421548751e-05, "loss": 3.020782470703125, "step": 47390 }, { "epoch": 0.38261908413583784, "grad_norm": 1.0727918148040771, "learning_rate": 1.2355758745749296e-05, "loss": 2.882776641845703, "step": 47400 }, { "epoch": 0.3826998054616049, "grad_norm": 1.266379952430725, "learning_rate": 1.2354143276011083e-05, "loss": 2.710860252380371, "step": 47410 }, { "epoch": 0.38278052678737196, "grad_norm": 0.8872500658035278, "learning_rate": 1.235252780627287e-05, "loss": 2.806624984741211, "step": 47420 }, { "epoch": 0.382861248113139, "grad_norm": 0.8636651039123535, "learning_rate": 1.2350912336534657e-05, "loss": 2.648725700378418, "step": 47430 }, { "epoch": 0.38294196943890607, "grad_norm": 0.9682127833366394, "learning_rate": 1.2349296866796443e-05, "loss": 2.6261539459228516, "step": 47440 }, { "epoch": 0.3830226907646731, "grad_norm": 1.0096735954284668, "learning_rate": 1.234768139705823e-05, "loss": 2.703182029724121, "step": 47450 }, { "epoch": 0.3831034120904402, "grad_norm": 0.9555923938751221, "learning_rate": 1.2346065927320017e-05, "loss": 2.9301036834716796, "step": 47460 }, { "epoch": 0.3831841334162072, "grad_norm": 1.5281039476394653, "learning_rate": 1.2344450457581804e-05, "loss": 3.4261795043945313, "step": 47470 }, { "epoch": 0.3832648547419743, "grad_norm": 0.7000711560249329, "learning_rate": 1.234283498784359e-05, "loss": 3.2468032836914062, "step": 47480 }, { "epoch": 0.38334557606774133, "grad_norm": 0.8766074776649475, "learning_rate": 1.2341219518105378e-05, "loss": 3.4677845001220704, "step": 47490 }, { "epoch": 0.38342629739350836, "grad_norm": 1.0115132331848145, "learning_rate": 1.2339604048367164e-05, "loss": 2.6316549301147463, "step": 47500 }, { "epoch": 0.38350701871927545, "grad_norm": 1.3353655338287354, "learning_rate": 1.2337988578628952e-05, "loss": 2.8668239593505858, "step": 47510 }, { "epoch": 0.3835877400450425, "grad_norm": 1.6430047750473022, "learning_rate": 1.2336373108890738e-05, "loss": 3.0093488693237305, "step": 47520 }, { "epoch": 0.38366846137080957, "grad_norm": 1.2400531768798828, "learning_rate": 1.2334757639152525e-05, "loss": 2.7705223083496096, "step": 47530 }, { "epoch": 0.3837491826965766, "grad_norm": 1.2485790252685547, "learning_rate": 1.2333142169414311e-05, "loss": 3.188701057434082, "step": 47540 }, { "epoch": 0.3838299040223437, "grad_norm": 1.0914461612701416, "learning_rate": 1.2331526699676099e-05, "loss": 2.9771860122680662, "step": 47550 }, { "epoch": 0.3839106253481107, "grad_norm": 0.9347277283668518, "learning_rate": 1.2329911229937885e-05, "loss": 2.76541748046875, "step": 47560 }, { "epoch": 0.3839913466738778, "grad_norm": 0.7092697620391846, "learning_rate": 1.2328295760199673e-05, "loss": 2.763652229309082, "step": 47570 }, { "epoch": 0.38407206799964483, "grad_norm": 1.476026177406311, "learning_rate": 1.2326680290461459e-05, "loss": 3.136066436767578, "step": 47580 }, { "epoch": 0.38415278932541186, "grad_norm": 1.2580766677856445, "learning_rate": 1.2325064820723247e-05, "loss": 2.6107450485229493, "step": 47590 }, { "epoch": 0.38423351065117894, "grad_norm": 1.0835704803466797, "learning_rate": 1.2323449350985033e-05, "loss": 2.7428049087524413, "step": 47600 }, { "epoch": 0.384314231976946, "grad_norm": 1.8239960670471191, "learning_rate": 1.232183388124682e-05, "loss": 3.059128189086914, "step": 47610 }, { "epoch": 0.38439495330271306, "grad_norm": 0.6753947138786316, "learning_rate": 1.2320218411508606e-05, "loss": 3.509891128540039, "step": 47620 }, { "epoch": 0.3844756746284801, "grad_norm": 1.615425705909729, "learning_rate": 1.2318602941770394e-05, "loss": 2.9727365493774416, "step": 47630 }, { "epoch": 0.3845563959542472, "grad_norm": 0.743382453918457, "learning_rate": 1.231698747203218e-05, "loss": 2.921060562133789, "step": 47640 }, { "epoch": 0.3846371172800142, "grad_norm": 1.191554069519043, "learning_rate": 1.2315372002293968e-05, "loss": 2.888483428955078, "step": 47650 }, { "epoch": 0.38471783860578124, "grad_norm": 1.5716586112976074, "learning_rate": 1.2313756532555754e-05, "loss": 2.9958255767822264, "step": 47660 }, { "epoch": 0.3847985599315483, "grad_norm": 1.0451058149337769, "learning_rate": 1.2312141062817541e-05, "loss": 3.31530876159668, "step": 47670 }, { "epoch": 0.38487928125731535, "grad_norm": 1.1822929382324219, "learning_rate": 1.2310525593079327e-05, "loss": 2.8486724853515626, "step": 47680 }, { "epoch": 0.38496000258308244, "grad_norm": 1.2961602210998535, "learning_rate": 1.2308910123341115e-05, "loss": 2.9213586807250977, "step": 47690 }, { "epoch": 0.38504072390884947, "grad_norm": 0.9467876553535461, "learning_rate": 1.2307294653602901e-05, "loss": 2.9013933181762694, "step": 47700 }, { "epoch": 0.38512144523461656, "grad_norm": 1.4286457300186157, "learning_rate": 1.2305679183864689e-05, "loss": 2.5823413848876955, "step": 47710 }, { "epoch": 0.3852021665603836, "grad_norm": 0.6639066338539124, "learning_rate": 1.2304063714126475e-05, "loss": 3.706145095825195, "step": 47720 }, { "epoch": 0.3852828878861506, "grad_norm": 0.5633189082145691, "learning_rate": 1.2302448244388262e-05, "loss": 2.798619842529297, "step": 47730 }, { "epoch": 0.3853636092119177, "grad_norm": 0.8409667015075684, "learning_rate": 1.2300832774650048e-05, "loss": 3.4645484924316405, "step": 47740 }, { "epoch": 0.38544433053768473, "grad_norm": 1.1324090957641602, "learning_rate": 1.2299217304911836e-05, "loss": 2.7548303604125977, "step": 47750 }, { "epoch": 0.3855250518634518, "grad_norm": 0.8040454983711243, "learning_rate": 1.2297601835173622e-05, "loss": 3.3390644073486326, "step": 47760 }, { "epoch": 0.38560577318921885, "grad_norm": 1.1031007766723633, "learning_rate": 1.229598636543541e-05, "loss": 2.8626413345336914, "step": 47770 }, { "epoch": 0.38568649451498593, "grad_norm": 1.3234913349151611, "learning_rate": 1.2294370895697196e-05, "loss": 2.7506330490112303, "step": 47780 }, { "epoch": 0.38576721584075296, "grad_norm": 1.1985982656478882, "learning_rate": 1.2292755425958985e-05, "loss": 2.7302696228027346, "step": 47790 }, { "epoch": 0.38584793716652005, "grad_norm": 0.8855620622634888, "learning_rate": 1.229113995622077e-05, "loss": 3.0601110458374023, "step": 47800 }, { "epoch": 0.3859286584922871, "grad_norm": 0.6825231313705444, "learning_rate": 1.2289524486482559e-05, "loss": 2.535282516479492, "step": 47810 }, { "epoch": 0.3860093798180541, "grad_norm": 0.6962991952896118, "learning_rate": 1.2287909016744343e-05, "loss": 2.7356311798095705, "step": 47820 }, { "epoch": 0.3860901011438212, "grad_norm": 1.1463146209716797, "learning_rate": 1.2286293547006133e-05, "loss": 2.6750364303588867, "step": 47830 }, { "epoch": 0.3861708224695882, "grad_norm": 1.316736102104187, "learning_rate": 1.2284678077267917e-05, "loss": 2.8558712005615234, "step": 47840 }, { "epoch": 0.3862515437953553, "grad_norm": 0.633232593536377, "learning_rate": 1.2283062607529706e-05, "loss": 2.96278076171875, "step": 47850 }, { "epoch": 0.38633226512112234, "grad_norm": 0.9789772629737854, "learning_rate": 1.228144713779149e-05, "loss": 2.741738128662109, "step": 47860 }, { "epoch": 0.38641298644688943, "grad_norm": 0.9454385638237, "learning_rate": 1.227983166805328e-05, "loss": 3.009893798828125, "step": 47870 }, { "epoch": 0.38649370777265646, "grad_norm": 0.9727265238761902, "learning_rate": 1.2278216198315068e-05, "loss": 2.8637195587158204, "step": 47880 }, { "epoch": 0.3865744290984235, "grad_norm": 1.049163579940796, "learning_rate": 1.2276600728576854e-05, "loss": 2.7526487350463866, "step": 47890 }, { "epoch": 0.3866551504241906, "grad_norm": 1.191753625869751, "learning_rate": 1.2274985258838641e-05, "loss": 2.541486358642578, "step": 47900 }, { "epoch": 0.3867358717499576, "grad_norm": 0.9798639416694641, "learning_rate": 1.2273369789100427e-05, "loss": 3.026133155822754, "step": 47910 }, { "epoch": 0.3868165930757247, "grad_norm": 0.9319261908531189, "learning_rate": 1.2271754319362215e-05, "loss": 2.6864866256713866, "step": 47920 }, { "epoch": 0.3868973144014917, "grad_norm": 1.1594939231872559, "learning_rate": 1.2270138849624001e-05, "loss": 3.1063764572143553, "step": 47930 }, { "epoch": 0.3869780357272588, "grad_norm": 1.4507704973220825, "learning_rate": 1.2268523379885789e-05, "loss": 2.721508598327637, "step": 47940 }, { "epoch": 0.38705875705302584, "grad_norm": 0.9952152371406555, "learning_rate": 1.2266907910147575e-05, "loss": 2.8317874908447265, "step": 47950 }, { "epoch": 0.38713947837879287, "grad_norm": 0.8443887233734131, "learning_rate": 1.2265292440409362e-05, "loss": 2.820231819152832, "step": 47960 }, { "epoch": 0.38722019970455995, "grad_norm": 1.0651921033859253, "learning_rate": 1.2263676970671148e-05, "loss": 2.5147966384887694, "step": 47970 }, { "epoch": 0.387300921030327, "grad_norm": 1.3477612733840942, "learning_rate": 1.2262061500932936e-05, "loss": 2.6499237060546874, "step": 47980 }, { "epoch": 0.38738164235609407, "grad_norm": 0.7550233602523804, "learning_rate": 1.2260446031194722e-05, "loss": 2.7586082458496093, "step": 47990 }, { "epoch": 0.3874623636818611, "grad_norm": 0.7844359874725342, "learning_rate": 1.225883056145651e-05, "loss": 3.030856895446777, "step": 48000 }, { "epoch": 0.3875430850076282, "grad_norm": 1.4362329244613647, "learning_rate": 1.2257215091718296e-05, "loss": 2.9631364822387694, "step": 48010 }, { "epoch": 0.3876238063333952, "grad_norm": 1.0421479940414429, "learning_rate": 1.2255599621980083e-05, "loss": 3.566497039794922, "step": 48020 }, { "epoch": 0.3877045276591623, "grad_norm": 0.6476061940193176, "learning_rate": 1.225398415224187e-05, "loss": 2.9120456695556642, "step": 48030 }, { "epoch": 0.38778524898492933, "grad_norm": 0.8814513683319092, "learning_rate": 1.2252368682503657e-05, "loss": 3.210671234130859, "step": 48040 }, { "epoch": 0.38786597031069636, "grad_norm": 1.3158081769943237, "learning_rate": 1.2250753212765443e-05, "loss": 2.9740154266357424, "step": 48050 }, { "epoch": 0.38794669163646345, "grad_norm": 0.7820774912834167, "learning_rate": 1.224913774302723e-05, "loss": 3.1066076278686525, "step": 48060 }, { "epoch": 0.3880274129622305, "grad_norm": 1.1822954416275024, "learning_rate": 1.2247522273289017e-05, "loss": 2.869019317626953, "step": 48070 }, { "epoch": 0.38810813428799756, "grad_norm": 0.8697738647460938, "learning_rate": 1.2245906803550805e-05, "loss": 2.8008071899414064, "step": 48080 }, { "epoch": 0.3881888556137646, "grad_norm": 0.9862592816352844, "learning_rate": 1.224429133381259e-05, "loss": 3.0594514846801757, "step": 48090 }, { "epoch": 0.3882695769395317, "grad_norm": 0.6038801074028015, "learning_rate": 1.2242675864074378e-05, "loss": 2.8575830459594727, "step": 48100 }, { "epoch": 0.3883502982652987, "grad_norm": 0.7707769870758057, "learning_rate": 1.2241060394336164e-05, "loss": 2.7306976318359375, "step": 48110 }, { "epoch": 0.38843101959106574, "grad_norm": 0.6166262626647949, "learning_rate": 1.2239444924597952e-05, "loss": 2.8559005737304686, "step": 48120 }, { "epoch": 0.3885117409168328, "grad_norm": 0.6350172162055969, "learning_rate": 1.2237829454859738e-05, "loss": 2.867905616760254, "step": 48130 }, { "epoch": 0.38859246224259986, "grad_norm": 1.1859039068222046, "learning_rate": 1.2236213985121526e-05, "loss": 2.768290138244629, "step": 48140 }, { "epoch": 0.38867318356836694, "grad_norm": 0.8830769658088684, "learning_rate": 1.2234598515383312e-05, "loss": 2.891567611694336, "step": 48150 }, { "epoch": 0.388753904894134, "grad_norm": 1.0405094623565674, "learning_rate": 1.22329830456451e-05, "loss": 3.0946680068969727, "step": 48160 }, { "epoch": 0.38883462621990106, "grad_norm": 1.2504926919937134, "learning_rate": 1.2231367575906885e-05, "loss": 2.717618942260742, "step": 48170 }, { "epoch": 0.3889153475456681, "grad_norm": 0.8538708686828613, "learning_rate": 1.2229752106168673e-05, "loss": 2.9514976501464845, "step": 48180 }, { "epoch": 0.3889960688714351, "grad_norm": 0.6235764026641846, "learning_rate": 1.2228136636430459e-05, "loss": 2.9380704879760744, "step": 48190 }, { "epoch": 0.3890767901972022, "grad_norm": 0.9378325343132019, "learning_rate": 1.2226521166692247e-05, "loss": 2.226367950439453, "step": 48200 }, { "epoch": 0.38915751152296923, "grad_norm": 1.0231237411499023, "learning_rate": 1.2224905696954033e-05, "loss": 2.867420959472656, "step": 48210 }, { "epoch": 0.3892382328487363, "grad_norm": 0.7190034985542297, "learning_rate": 1.222329022721582e-05, "loss": 2.5865476608276365, "step": 48220 }, { "epoch": 0.38931895417450335, "grad_norm": 1.0792489051818848, "learning_rate": 1.2221674757477606e-05, "loss": 3.3266490936279296, "step": 48230 }, { "epoch": 0.38939967550027044, "grad_norm": 1.0612493753433228, "learning_rate": 1.2220059287739394e-05, "loss": 2.6311792373657226, "step": 48240 }, { "epoch": 0.38948039682603747, "grad_norm": 0.7603699564933777, "learning_rate": 1.221844381800118e-05, "loss": 2.8269107818603514, "step": 48250 }, { "epoch": 0.38956111815180455, "grad_norm": 1.6484962701797485, "learning_rate": 1.2216828348262968e-05, "loss": 3.054953765869141, "step": 48260 }, { "epoch": 0.3896418394775716, "grad_norm": 0.9822835326194763, "learning_rate": 1.2215212878524754e-05, "loss": 2.6394514083862304, "step": 48270 }, { "epoch": 0.3897225608033386, "grad_norm": 1.1463310718536377, "learning_rate": 1.2213597408786541e-05, "loss": 2.5466686248779298, "step": 48280 }, { "epoch": 0.3898032821291057, "grad_norm": 0.8880779147148132, "learning_rate": 1.2211981939048327e-05, "loss": 2.605620193481445, "step": 48290 }, { "epoch": 0.38988400345487273, "grad_norm": 0.8029316663742065, "learning_rate": 1.2210366469310115e-05, "loss": 3.0590681076049804, "step": 48300 }, { "epoch": 0.3899647247806398, "grad_norm": 1.6987276077270508, "learning_rate": 1.2208750999571901e-05, "loss": 2.632619285583496, "step": 48310 }, { "epoch": 0.39004544610640685, "grad_norm": 0.7615813612937927, "learning_rate": 1.2207135529833689e-05, "loss": 2.909958076477051, "step": 48320 }, { "epoch": 0.39012616743217393, "grad_norm": 0.6021737456321716, "learning_rate": 1.2205520060095475e-05, "loss": 2.906085395812988, "step": 48330 }, { "epoch": 0.39020688875794096, "grad_norm": 1.2530908584594727, "learning_rate": 1.2203904590357262e-05, "loss": 2.948188591003418, "step": 48340 }, { "epoch": 0.390287610083708, "grad_norm": 1.259090781211853, "learning_rate": 1.2202289120619048e-05, "loss": 3.368946838378906, "step": 48350 }, { "epoch": 0.3903683314094751, "grad_norm": 1.1782184839248657, "learning_rate": 1.2200673650880836e-05, "loss": 3.0291955947875975, "step": 48360 }, { "epoch": 0.3904490527352421, "grad_norm": 1.3717278242111206, "learning_rate": 1.2199058181142622e-05, "loss": 3.487423324584961, "step": 48370 }, { "epoch": 0.3905297740610092, "grad_norm": 0.6171412467956543, "learning_rate": 1.219744271140441e-05, "loss": 3.1708602905273438, "step": 48380 }, { "epoch": 0.3906104953867762, "grad_norm": 1.149707317352295, "learning_rate": 1.2195827241666196e-05, "loss": 2.729377555847168, "step": 48390 }, { "epoch": 0.3906912167125433, "grad_norm": 1.1604657173156738, "learning_rate": 1.2194211771927984e-05, "loss": 3.350962829589844, "step": 48400 }, { "epoch": 0.39077193803831034, "grad_norm": 0.9178034663200378, "learning_rate": 1.219259630218977e-05, "loss": 2.9726863861083985, "step": 48410 }, { "epoch": 0.39085265936407737, "grad_norm": 0.7952579259872437, "learning_rate": 1.2190980832451557e-05, "loss": 3.3575225830078126, "step": 48420 }, { "epoch": 0.39093338068984446, "grad_norm": 0.5952125191688538, "learning_rate": 1.2189365362713343e-05, "loss": 3.4021419525146483, "step": 48430 }, { "epoch": 0.3910141020156115, "grad_norm": 0.9210447072982788, "learning_rate": 1.2187749892975131e-05, "loss": 3.1276147842407225, "step": 48440 }, { "epoch": 0.3910948233413786, "grad_norm": 1.321115493774414, "learning_rate": 1.2186134423236917e-05, "loss": 2.8660568237304687, "step": 48450 }, { "epoch": 0.3911755446671456, "grad_norm": 1.3022524118423462, "learning_rate": 1.2184518953498705e-05, "loss": 3.791024017333984, "step": 48460 }, { "epoch": 0.3912562659929127, "grad_norm": 1.0297662019729614, "learning_rate": 1.218290348376049e-05, "loss": 2.858235168457031, "step": 48470 }, { "epoch": 0.3913369873186797, "grad_norm": 1.4921468496322632, "learning_rate": 1.2181288014022278e-05, "loss": 3.0096837997436525, "step": 48480 }, { "epoch": 0.39141770864444675, "grad_norm": 0.9265198707580566, "learning_rate": 1.2179672544284064e-05, "loss": 2.5324947357177736, "step": 48490 }, { "epoch": 0.39149842997021383, "grad_norm": 1.3373401165008545, "learning_rate": 1.2178057074545852e-05, "loss": 3.012052536010742, "step": 48500 }, { "epoch": 0.39157915129598087, "grad_norm": 0.9449930787086487, "learning_rate": 1.2176441604807638e-05, "loss": 3.139664649963379, "step": 48510 }, { "epoch": 0.39165987262174795, "grad_norm": 0.6547330617904663, "learning_rate": 1.2174826135069426e-05, "loss": 2.9309612274169923, "step": 48520 }, { "epoch": 0.391740593947515, "grad_norm": 1.0311530828475952, "learning_rate": 1.2173210665331212e-05, "loss": 2.685470199584961, "step": 48530 }, { "epoch": 0.39182131527328207, "grad_norm": 0.8619706034660339, "learning_rate": 1.2171595195593e-05, "loss": 2.883234977722168, "step": 48540 }, { "epoch": 0.3919020365990491, "grad_norm": 0.810550332069397, "learning_rate": 1.2169979725854785e-05, "loss": 4.089762878417969, "step": 48550 }, { "epoch": 0.3919827579248162, "grad_norm": 0.7329712510108948, "learning_rate": 1.2168364256116573e-05, "loss": 3.049043083190918, "step": 48560 }, { "epoch": 0.3920634792505832, "grad_norm": 0.9591856598854065, "learning_rate": 1.2166748786378359e-05, "loss": 2.7566755294799803, "step": 48570 }, { "epoch": 0.39214420057635024, "grad_norm": 0.5421905517578125, "learning_rate": 1.2165133316640147e-05, "loss": 2.425794792175293, "step": 48580 }, { "epoch": 0.39222492190211733, "grad_norm": 1.3857017755508423, "learning_rate": 1.2163517846901933e-05, "loss": 2.8202560424804686, "step": 48590 }, { "epoch": 0.39230564322788436, "grad_norm": 0.8444117903709412, "learning_rate": 1.216190237716372e-05, "loss": 3.0109460830688475, "step": 48600 }, { "epoch": 0.39238636455365145, "grad_norm": 1.4101868867874146, "learning_rate": 1.2160286907425506e-05, "loss": 2.935564422607422, "step": 48610 }, { "epoch": 0.3924670858794185, "grad_norm": 1.041896104812622, "learning_rate": 1.2158671437687294e-05, "loss": 2.7979591369628904, "step": 48620 }, { "epoch": 0.39254780720518556, "grad_norm": 1.171077847480774, "learning_rate": 1.215705596794908e-05, "loss": 2.725766181945801, "step": 48630 }, { "epoch": 0.3926285285309526, "grad_norm": 0.8296238780021667, "learning_rate": 1.2155440498210868e-05, "loss": 2.6682920455932617, "step": 48640 }, { "epoch": 0.3927092498567196, "grad_norm": 1.2417398691177368, "learning_rate": 1.2153825028472654e-05, "loss": 2.545985794067383, "step": 48650 }, { "epoch": 0.3927899711824867, "grad_norm": 0.7916917204856873, "learning_rate": 1.2152209558734443e-05, "loss": 2.8330547332763674, "step": 48660 }, { "epoch": 0.39287069250825374, "grad_norm": 0.6142646670341492, "learning_rate": 1.2150594088996228e-05, "loss": 3.3927051544189455, "step": 48670 }, { "epoch": 0.3929514138340208, "grad_norm": 0.9654515981674194, "learning_rate": 1.2148978619258017e-05, "loss": 2.706458854675293, "step": 48680 }, { "epoch": 0.39303213515978785, "grad_norm": 1.2159541845321655, "learning_rate": 1.2147363149519801e-05, "loss": 2.751348114013672, "step": 48690 }, { "epoch": 0.39311285648555494, "grad_norm": 0.5995701551437378, "learning_rate": 1.214574767978159e-05, "loss": 2.7572296142578123, "step": 48700 }, { "epoch": 0.39319357781132197, "grad_norm": 0.828018069267273, "learning_rate": 1.2144132210043375e-05, "loss": 2.8221736907958985, "step": 48710 }, { "epoch": 0.393274299137089, "grad_norm": 0.8309490084648132, "learning_rate": 1.2142516740305164e-05, "loss": 2.7478965759277343, "step": 48720 }, { "epoch": 0.3933550204628561, "grad_norm": 0.7546589970588684, "learning_rate": 1.2140901270566949e-05, "loss": 2.9549228668212892, "step": 48730 }, { "epoch": 0.3934357417886231, "grad_norm": 0.6513755917549133, "learning_rate": 1.2139285800828738e-05, "loss": 2.888579177856445, "step": 48740 }, { "epoch": 0.3935164631143902, "grad_norm": 1.552346110343933, "learning_rate": 1.2137670331090522e-05, "loss": 2.495862579345703, "step": 48750 }, { "epoch": 0.39359718444015723, "grad_norm": 1.0551855564117432, "learning_rate": 1.2136054861352312e-05, "loss": 2.4139387130737306, "step": 48760 }, { "epoch": 0.3936779057659243, "grad_norm": 1.4496147632598877, "learning_rate": 1.2134439391614096e-05, "loss": 3.0685752868652343, "step": 48770 }, { "epoch": 0.39375862709169135, "grad_norm": 2.1944031715393066, "learning_rate": 1.2132823921875885e-05, "loss": 2.754551315307617, "step": 48780 }, { "epoch": 0.39383934841745843, "grad_norm": 1.3889626264572144, "learning_rate": 1.213120845213767e-05, "loss": 3.3824954986572267, "step": 48790 }, { "epoch": 0.39392006974322546, "grad_norm": 1.8196226358413696, "learning_rate": 1.2129592982399459e-05, "loss": 3.2839515686035154, "step": 48800 }, { "epoch": 0.3940007910689925, "grad_norm": 0.7807741761207581, "learning_rate": 1.2127977512661243e-05, "loss": 2.7734821319580076, "step": 48810 }, { "epoch": 0.3940815123947596, "grad_norm": 0.7154921889305115, "learning_rate": 1.2126362042923033e-05, "loss": 2.7210052490234373, "step": 48820 }, { "epoch": 0.3941622337205266, "grad_norm": 1.1172980070114136, "learning_rate": 1.2124746573184819e-05, "loss": 3.3961891174316405, "step": 48830 }, { "epoch": 0.3942429550462937, "grad_norm": 1.4971933364868164, "learning_rate": 1.2123131103446606e-05, "loss": 2.6561716079711912, "step": 48840 }, { "epoch": 0.3943236763720607, "grad_norm": 0.9437954425811768, "learning_rate": 1.2121515633708392e-05, "loss": 2.5644540786743164, "step": 48850 }, { "epoch": 0.3944043976978278, "grad_norm": 1.0659286975860596, "learning_rate": 1.211990016397018e-05, "loss": 3.089900779724121, "step": 48860 }, { "epoch": 0.39448511902359484, "grad_norm": 0.9045607447624207, "learning_rate": 1.2118284694231966e-05, "loss": 3.33399658203125, "step": 48870 }, { "epoch": 0.3945658403493619, "grad_norm": 0.7608701586723328, "learning_rate": 1.2116669224493754e-05, "loss": 3.2103012084960936, "step": 48880 }, { "epoch": 0.39464656167512896, "grad_norm": 0.6239421963691711, "learning_rate": 1.211505375475554e-05, "loss": 2.783835029602051, "step": 48890 }, { "epoch": 0.394727283000896, "grad_norm": 1.5069830417633057, "learning_rate": 1.2113438285017328e-05, "loss": 2.470273017883301, "step": 48900 }, { "epoch": 0.3948080043266631, "grad_norm": 0.9472894668579102, "learning_rate": 1.2111822815279114e-05, "loss": 2.621118354797363, "step": 48910 }, { "epoch": 0.3948887256524301, "grad_norm": 0.9960248470306396, "learning_rate": 1.2110207345540901e-05, "loss": 3.043488311767578, "step": 48920 }, { "epoch": 0.3949694469781972, "grad_norm": 1.0069029331207275, "learning_rate": 1.2108591875802687e-05, "loss": 2.5525279998779298, "step": 48930 }, { "epoch": 0.3950501683039642, "grad_norm": 1.0321176052093506, "learning_rate": 1.2106976406064475e-05, "loss": 2.926194190979004, "step": 48940 }, { "epoch": 0.39513088962973125, "grad_norm": 1.0024521350860596, "learning_rate": 1.2105360936326261e-05, "loss": 2.784414863586426, "step": 48950 }, { "epoch": 0.39521161095549834, "grad_norm": 1.300945520401001, "learning_rate": 1.2103745466588049e-05, "loss": 2.400753402709961, "step": 48960 }, { "epoch": 0.39529233228126537, "grad_norm": 1.0000255107879639, "learning_rate": 1.2102129996849835e-05, "loss": 3.20625, "step": 48970 }, { "epoch": 0.39537305360703245, "grad_norm": 0.931063711643219, "learning_rate": 1.2100514527111622e-05, "loss": 2.9161211013793946, "step": 48980 }, { "epoch": 0.3954537749327995, "grad_norm": 1.061972975730896, "learning_rate": 1.2098899057373408e-05, "loss": 2.6923185348510743, "step": 48990 }, { "epoch": 0.39553449625856657, "grad_norm": 1.0288810729980469, "learning_rate": 1.2097283587635196e-05, "loss": 2.979235076904297, "step": 49000 }, { "epoch": 0.3956152175843336, "grad_norm": 1.3745867013931274, "learning_rate": 1.2095668117896982e-05, "loss": 2.9225419998168944, "step": 49010 }, { "epoch": 0.3956959389101007, "grad_norm": 1.2429119348526, "learning_rate": 1.209405264815877e-05, "loss": 3.4087867736816406, "step": 49020 }, { "epoch": 0.3957766602358677, "grad_norm": 1.2386901378631592, "learning_rate": 1.2092437178420556e-05, "loss": 3.034068298339844, "step": 49030 }, { "epoch": 0.39585738156163475, "grad_norm": 0.8036497235298157, "learning_rate": 1.2090821708682343e-05, "loss": 2.449328804016113, "step": 49040 }, { "epoch": 0.39593810288740183, "grad_norm": 1.005906105041504, "learning_rate": 1.208920623894413e-05, "loss": 3.1103662490844726, "step": 49050 }, { "epoch": 0.39601882421316886, "grad_norm": 1.8246955871582031, "learning_rate": 1.2087590769205917e-05, "loss": 3.047201728820801, "step": 49060 }, { "epoch": 0.39609954553893595, "grad_norm": 0.6604504585266113, "learning_rate": 1.2085975299467703e-05, "loss": 3.0188180923461916, "step": 49070 }, { "epoch": 0.396180266864703, "grad_norm": 0.99028080701828, "learning_rate": 1.208435982972949e-05, "loss": 2.9167236328125, "step": 49080 }, { "epoch": 0.39626098819047006, "grad_norm": 1.10819411277771, "learning_rate": 1.2082744359991277e-05, "loss": 2.4633457183837892, "step": 49090 }, { "epoch": 0.3963417095162371, "grad_norm": 0.9356212019920349, "learning_rate": 1.2081128890253064e-05, "loss": 3.246197509765625, "step": 49100 }, { "epoch": 0.3964224308420041, "grad_norm": 0.8848832249641418, "learning_rate": 1.207951342051485e-05, "loss": 2.5348358154296875, "step": 49110 }, { "epoch": 0.3965031521677712, "grad_norm": 1.2320001125335693, "learning_rate": 1.2077897950776638e-05, "loss": 2.746323013305664, "step": 49120 }, { "epoch": 0.39658387349353824, "grad_norm": 0.8040656447410583, "learning_rate": 1.2076282481038426e-05, "loss": 2.949171257019043, "step": 49130 }, { "epoch": 0.3966645948193053, "grad_norm": 0.9503738284111023, "learning_rate": 1.2074667011300212e-05, "loss": 2.892444610595703, "step": 49140 }, { "epoch": 0.39674531614507236, "grad_norm": 0.9051498770713806, "learning_rate": 1.2073051541562e-05, "loss": 3.287240219116211, "step": 49150 }, { "epoch": 0.39682603747083944, "grad_norm": 0.8030309677124023, "learning_rate": 1.2071436071823786e-05, "loss": 3.5417724609375, "step": 49160 }, { "epoch": 0.3969067587966065, "grad_norm": 1.063096046447754, "learning_rate": 1.2069820602085573e-05, "loss": 3.0504219055175783, "step": 49170 }, { "epoch": 0.3969874801223735, "grad_norm": 1.1073808670043945, "learning_rate": 1.206820513234736e-05, "loss": 2.5452392578125, "step": 49180 }, { "epoch": 0.3970682014481406, "grad_norm": 0.845237672328949, "learning_rate": 1.2066589662609147e-05, "loss": 3.2162803649902343, "step": 49190 }, { "epoch": 0.3971489227739076, "grad_norm": 0.9320453405380249, "learning_rate": 1.2064974192870933e-05, "loss": 2.9969833374023436, "step": 49200 }, { "epoch": 0.3972296440996747, "grad_norm": 0.6893805265426636, "learning_rate": 1.206335872313272e-05, "loss": 2.920872688293457, "step": 49210 }, { "epoch": 0.39731036542544174, "grad_norm": 0.9227913618087769, "learning_rate": 1.2061743253394507e-05, "loss": 2.7841909408569334, "step": 49220 }, { "epoch": 0.3973910867512088, "grad_norm": 0.9073241353034973, "learning_rate": 1.2060127783656294e-05, "loss": 2.883890151977539, "step": 49230 }, { "epoch": 0.39747180807697585, "grad_norm": 1.177446961402893, "learning_rate": 1.205851231391808e-05, "loss": 2.8041929244995116, "step": 49240 }, { "epoch": 0.39755252940274294, "grad_norm": 1.363802433013916, "learning_rate": 1.2056896844179868e-05, "loss": 2.8174095153808594, "step": 49250 }, { "epoch": 0.39763325072850997, "grad_norm": 1.0992603302001953, "learning_rate": 1.2055281374441654e-05, "loss": 3.0860342025756835, "step": 49260 }, { "epoch": 0.397713972054277, "grad_norm": 0.6185312271118164, "learning_rate": 1.2053665904703442e-05, "loss": 2.8588932037353514, "step": 49270 }, { "epoch": 0.3977946933800441, "grad_norm": 0.9502148628234863, "learning_rate": 1.2052050434965228e-05, "loss": 2.7554929733276365, "step": 49280 }, { "epoch": 0.3978754147058111, "grad_norm": 0.9651452898979187, "learning_rate": 1.2050434965227015e-05, "loss": 3.0071956634521486, "step": 49290 }, { "epoch": 0.3979561360315782, "grad_norm": 0.9439098238945007, "learning_rate": 1.2048819495488801e-05, "loss": 2.8883779525756834, "step": 49300 }, { "epoch": 0.39803685735734523, "grad_norm": 0.9993108510971069, "learning_rate": 1.2047204025750589e-05, "loss": 3.082401466369629, "step": 49310 }, { "epoch": 0.3981175786831123, "grad_norm": 0.887516975402832, "learning_rate": 1.2045588556012375e-05, "loss": 2.680339050292969, "step": 49320 }, { "epoch": 0.39819830000887935, "grad_norm": 0.9871655702590942, "learning_rate": 1.2043973086274163e-05, "loss": 3.0604835510253907, "step": 49330 }, { "epoch": 0.3982790213346464, "grad_norm": 0.8963265419006348, "learning_rate": 1.2042357616535949e-05, "loss": 3.042799949645996, "step": 49340 }, { "epoch": 0.39835974266041346, "grad_norm": 1.0338854789733887, "learning_rate": 1.2040742146797736e-05, "loss": 2.794871711730957, "step": 49350 }, { "epoch": 0.3984404639861805, "grad_norm": 0.909396231174469, "learning_rate": 1.2039126677059522e-05, "loss": 2.4139030456542967, "step": 49360 }, { "epoch": 0.3985211853119476, "grad_norm": 0.948826789855957, "learning_rate": 1.203751120732131e-05, "loss": 2.80039005279541, "step": 49370 }, { "epoch": 0.3986019066377146, "grad_norm": 0.8866267800331116, "learning_rate": 1.2035895737583096e-05, "loss": 3.1537664413452147, "step": 49380 }, { "epoch": 0.3986826279634817, "grad_norm": 0.8756435513496399, "learning_rate": 1.2034280267844884e-05, "loss": 2.801142120361328, "step": 49390 }, { "epoch": 0.3987633492892487, "grad_norm": 1.4369555711746216, "learning_rate": 1.203266479810667e-05, "loss": 2.748609733581543, "step": 49400 }, { "epoch": 0.39884407061501576, "grad_norm": 0.615917980670929, "learning_rate": 1.2031049328368458e-05, "loss": 2.9523221969604494, "step": 49410 }, { "epoch": 0.39892479194078284, "grad_norm": 0.8018450736999512, "learning_rate": 1.2029433858630244e-05, "loss": 2.758449745178223, "step": 49420 }, { "epoch": 0.39900551326654987, "grad_norm": 0.9529198408126831, "learning_rate": 1.2027818388892031e-05, "loss": 2.5503997802734375, "step": 49430 }, { "epoch": 0.39908623459231696, "grad_norm": 0.8600413799285889, "learning_rate": 1.2026202919153817e-05, "loss": 2.809139442443848, "step": 49440 }, { "epoch": 0.399166955918084, "grad_norm": 1.0211122035980225, "learning_rate": 1.2024587449415605e-05, "loss": 2.7279911041259766, "step": 49450 }, { "epoch": 0.3992476772438511, "grad_norm": 1.536751627922058, "learning_rate": 1.2022971979677391e-05, "loss": 3.181371307373047, "step": 49460 }, { "epoch": 0.3993283985696181, "grad_norm": 1.413822054862976, "learning_rate": 1.2021356509939179e-05, "loss": 3.1378040313720703, "step": 49470 }, { "epoch": 0.3994091198953852, "grad_norm": 1.128636121749878, "learning_rate": 1.2019741040200965e-05, "loss": 3.239679718017578, "step": 49480 }, { "epoch": 0.3994898412211522, "grad_norm": 1.0980587005615234, "learning_rate": 1.2018125570462752e-05, "loss": 2.8922462463378906, "step": 49490 }, { "epoch": 0.39957056254691925, "grad_norm": 0.6831726431846619, "learning_rate": 1.2016510100724538e-05, "loss": 2.5515886306762696, "step": 49500 }, { "epoch": 0.39965128387268634, "grad_norm": 0.6257433891296387, "learning_rate": 1.2014894630986326e-05, "loss": 2.706254577636719, "step": 49510 }, { "epoch": 0.39973200519845337, "grad_norm": 1.4186748266220093, "learning_rate": 1.2013279161248112e-05, "loss": 3.150025177001953, "step": 49520 }, { "epoch": 0.39981272652422045, "grad_norm": 0.8318616151809692, "learning_rate": 1.2011663691509901e-05, "loss": 2.819413948059082, "step": 49530 }, { "epoch": 0.3998934478499875, "grad_norm": 1.1823585033416748, "learning_rate": 1.2010048221771686e-05, "loss": 2.9245134353637696, "step": 49540 }, { "epoch": 0.39997416917575457, "grad_norm": 0.8105467557907104, "learning_rate": 1.2008432752033475e-05, "loss": 3.4163528442382813, "step": 49550 }, { "epoch": 0.4000548905015216, "grad_norm": 0.9593014717102051, "learning_rate": 1.200681728229526e-05, "loss": 3.2897933959960937, "step": 49560 }, { "epoch": 0.40013561182728863, "grad_norm": 1.0345616340637207, "learning_rate": 1.2005201812557049e-05, "loss": 2.459636116027832, "step": 49570 }, { "epoch": 0.4002163331530557, "grad_norm": 1.757820963859558, "learning_rate": 1.2003586342818833e-05, "loss": 3.2068191528320313, "step": 49580 }, { "epoch": 0.40029705447882274, "grad_norm": 0.7656896114349365, "learning_rate": 1.2001970873080622e-05, "loss": 2.479025459289551, "step": 49590 }, { "epoch": 0.40037777580458983, "grad_norm": 0.7194562554359436, "learning_rate": 1.2000355403342407e-05, "loss": 3.234843063354492, "step": 49600 }, { "epoch": 0.40045849713035686, "grad_norm": 0.9261488318443298, "learning_rate": 1.1998739933604196e-05, "loss": 2.962770462036133, "step": 49610 }, { "epoch": 0.40053921845612395, "grad_norm": 1.0674580335617065, "learning_rate": 1.199712446386598e-05, "loss": 2.6705463409423826, "step": 49620 }, { "epoch": 0.400619939781891, "grad_norm": 1.2596988677978516, "learning_rate": 1.199550899412777e-05, "loss": 2.7796485900878904, "step": 49630 }, { "epoch": 0.400700661107658, "grad_norm": 0.699646532535553, "learning_rate": 1.1993893524389554e-05, "loss": 2.607369804382324, "step": 49640 }, { "epoch": 0.4007813824334251, "grad_norm": 1.1117738485336304, "learning_rate": 1.1992278054651344e-05, "loss": 2.798331642150879, "step": 49650 }, { "epoch": 0.4008621037591921, "grad_norm": 0.9606999158859253, "learning_rate": 1.1990662584913128e-05, "loss": 2.45644474029541, "step": 49660 }, { "epoch": 0.4009428250849592, "grad_norm": 0.902434766292572, "learning_rate": 1.1989047115174917e-05, "loss": 2.6224966049194336, "step": 49670 }, { "epoch": 0.40102354641072624, "grad_norm": 0.9314649701118469, "learning_rate": 1.1987431645436702e-05, "loss": 3.021192169189453, "step": 49680 }, { "epoch": 0.4011042677364933, "grad_norm": 0.7691655158996582, "learning_rate": 1.1985816175698491e-05, "loss": 3.2139888763427735, "step": 49690 }, { "epoch": 0.40118498906226036, "grad_norm": 0.6907702088356018, "learning_rate": 1.1984200705960277e-05, "loss": 2.9632097244262696, "step": 49700 }, { "epoch": 0.4012657103880274, "grad_norm": 0.5441285371780396, "learning_rate": 1.1982585236222065e-05, "loss": 2.701658248901367, "step": 49710 }, { "epoch": 0.40134643171379447, "grad_norm": 1.481759786605835, "learning_rate": 1.198096976648385e-05, "loss": 3.2626968383789063, "step": 49720 }, { "epoch": 0.4014271530395615, "grad_norm": 0.9486079216003418, "learning_rate": 1.1979354296745638e-05, "loss": 2.6595483779907227, "step": 49730 }, { "epoch": 0.4015078743653286, "grad_norm": 0.5292319655418396, "learning_rate": 1.1977738827007424e-05, "loss": 2.7618101119995115, "step": 49740 }, { "epoch": 0.4015885956910956, "grad_norm": 0.6000354290008545, "learning_rate": 1.1976123357269212e-05, "loss": 2.907927322387695, "step": 49750 }, { "epoch": 0.4016693170168627, "grad_norm": 1.0201311111450195, "learning_rate": 1.1974507887530998e-05, "loss": 3.0890243530273436, "step": 49760 }, { "epoch": 0.40175003834262973, "grad_norm": 0.7526915073394775, "learning_rate": 1.1972892417792786e-05, "loss": 2.8917823791503907, "step": 49770 }, { "epoch": 0.4018307596683968, "grad_norm": 1.2634735107421875, "learning_rate": 1.1971276948054572e-05, "loss": 3.207168197631836, "step": 49780 }, { "epoch": 0.40191148099416385, "grad_norm": 1.091630220413208, "learning_rate": 1.196966147831636e-05, "loss": 3.470253753662109, "step": 49790 }, { "epoch": 0.4019922023199309, "grad_norm": 1.0339549779891968, "learning_rate": 1.1968046008578145e-05, "loss": 3.324668121337891, "step": 49800 }, { "epoch": 0.40207292364569797, "grad_norm": 0.8287491798400879, "learning_rate": 1.1966430538839933e-05, "loss": 2.5106273651123048, "step": 49810 }, { "epoch": 0.402153644971465, "grad_norm": 0.7268654704093933, "learning_rate": 1.1964815069101719e-05, "loss": 2.5148950576782227, "step": 49820 }, { "epoch": 0.4022343662972321, "grad_norm": 1.4655473232269287, "learning_rate": 1.1963199599363507e-05, "loss": 2.6389360427856445, "step": 49830 }, { "epoch": 0.4023150876229991, "grad_norm": 0.8291094899177551, "learning_rate": 1.1961584129625293e-05, "loss": 2.7792179107666017, "step": 49840 }, { "epoch": 0.4023958089487662, "grad_norm": 0.9103687405586243, "learning_rate": 1.195996865988708e-05, "loss": 3.056760787963867, "step": 49850 }, { "epoch": 0.40247653027453323, "grad_norm": 1.4232591390609741, "learning_rate": 1.1958353190148866e-05, "loss": 3.0841712951660156, "step": 49860 }, { "epoch": 0.40255725160030026, "grad_norm": 1.4366694688796997, "learning_rate": 1.1956737720410654e-05, "loss": 3.3277236938476564, "step": 49870 }, { "epoch": 0.40263797292606734, "grad_norm": 0.8806700706481934, "learning_rate": 1.195512225067244e-05, "loss": 3.2446109771728517, "step": 49880 }, { "epoch": 0.4027186942518344, "grad_norm": 1.3642147779464722, "learning_rate": 1.1953506780934228e-05, "loss": 3.204620361328125, "step": 49890 }, { "epoch": 0.40279941557760146, "grad_norm": 0.9252341985702515, "learning_rate": 1.1951891311196014e-05, "loss": 2.519513702392578, "step": 49900 }, { "epoch": 0.4028801369033685, "grad_norm": 1.061537265777588, "learning_rate": 1.1950275841457801e-05, "loss": 3.219020462036133, "step": 49910 }, { "epoch": 0.4029608582291356, "grad_norm": 0.9798766374588013, "learning_rate": 1.1948660371719587e-05, "loss": 2.685077667236328, "step": 49920 }, { "epoch": 0.4030415795549026, "grad_norm": 0.8542972803115845, "learning_rate": 1.1947044901981375e-05, "loss": 2.8806018829345703, "step": 49930 }, { "epoch": 0.40312230088066964, "grad_norm": 1.152379035949707, "learning_rate": 1.1945429432243161e-05, "loss": 3.0787431716918947, "step": 49940 }, { "epoch": 0.4032030222064367, "grad_norm": 1.221663236618042, "learning_rate": 1.1943813962504949e-05, "loss": 3.2043418884277344, "step": 49950 }, { "epoch": 0.40328374353220375, "grad_norm": 0.8042710423469543, "learning_rate": 1.1942198492766735e-05, "loss": 2.568358612060547, "step": 49960 }, { "epoch": 0.40336446485797084, "grad_norm": 0.6101638078689575, "learning_rate": 1.1940583023028523e-05, "loss": 3.1123538970947267, "step": 49970 }, { "epoch": 0.40344518618373787, "grad_norm": 0.8300483822822571, "learning_rate": 1.1938967553290309e-05, "loss": 3.1586803436279296, "step": 49980 }, { "epoch": 0.40352590750950496, "grad_norm": 0.9942092895507812, "learning_rate": 1.1937352083552096e-05, "loss": 2.8057647705078126, "step": 49990 }, { "epoch": 0.403606628835272, "grad_norm": 0.6600027680397034, "learning_rate": 1.1935736613813882e-05, "loss": 3.182280731201172, "step": 50000 }, { "epoch": 0.40368735016103907, "grad_norm": 0.9902964234352112, "learning_rate": 1.193412114407567e-05, "loss": 2.9879476547241213, "step": 50010 }, { "epoch": 0.4037680714868061, "grad_norm": 1.457931399345398, "learning_rate": 1.1932505674337456e-05, "loss": 2.4964807510375975, "step": 50020 }, { "epoch": 0.40384879281257313, "grad_norm": 1.42750084400177, "learning_rate": 1.1930890204599244e-05, "loss": 3.0254350662231446, "step": 50030 }, { "epoch": 0.4039295141383402, "grad_norm": 1.3426003456115723, "learning_rate": 1.192927473486103e-05, "loss": 3.0293062210083006, "step": 50040 }, { "epoch": 0.40401023546410725, "grad_norm": 0.9622653126716614, "learning_rate": 1.1927659265122817e-05, "loss": 3.1130598068237303, "step": 50050 }, { "epoch": 0.40409095678987433, "grad_norm": 1.2222486734390259, "learning_rate": 1.1926043795384603e-05, "loss": 3.0543556213378906, "step": 50060 }, { "epoch": 0.40417167811564136, "grad_norm": 0.9901844263076782, "learning_rate": 1.1924428325646391e-05, "loss": 2.5815879821777346, "step": 50070 }, { "epoch": 0.40425239944140845, "grad_norm": 0.6778108477592468, "learning_rate": 1.1922812855908177e-05, "loss": 2.759740447998047, "step": 50080 }, { "epoch": 0.4043331207671755, "grad_norm": 0.8944692015647888, "learning_rate": 1.1921197386169965e-05, "loss": 2.834079551696777, "step": 50090 }, { "epoch": 0.4044138420929425, "grad_norm": 1.263346552848816, "learning_rate": 1.191958191643175e-05, "loss": 3.2729183197021485, "step": 50100 }, { "epoch": 0.4044945634187096, "grad_norm": 0.8373952507972717, "learning_rate": 1.1917966446693538e-05, "loss": 2.827102851867676, "step": 50110 }, { "epoch": 0.4045752847444766, "grad_norm": 1.4941532611846924, "learning_rate": 1.1916350976955324e-05, "loss": 3.20428581237793, "step": 50120 }, { "epoch": 0.4046560060702437, "grad_norm": 1.1348482370376587, "learning_rate": 1.1914735507217112e-05, "loss": 3.142974281311035, "step": 50130 }, { "epoch": 0.40473672739601074, "grad_norm": 0.8382717967033386, "learning_rate": 1.1913120037478898e-05, "loss": 2.73617000579834, "step": 50140 }, { "epoch": 0.40481744872177783, "grad_norm": 1.4998340606689453, "learning_rate": 1.1911504567740686e-05, "loss": 2.6033348083496093, "step": 50150 }, { "epoch": 0.40489817004754486, "grad_norm": 0.6780884861946106, "learning_rate": 1.1909889098002472e-05, "loss": 3.1636163711547853, "step": 50160 }, { "epoch": 0.4049788913733119, "grad_norm": 1.0259588956832886, "learning_rate": 1.190827362826426e-05, "loss": 3.085023307800293, "step": 50170 }, { "epoch": 0.405059612699079, "grad_norm": 0.9190975427627563, "learning_rate": 1.1906658158526045e-05, "loss": 2.8953567504882813, "step": 50180 }, { "epoch": 0.405140334024846, "grad_norm": 0.7840576767921448, "learning_rate": 1.1905042688787833e-05, "loss": 2.958943557739258, "step": 50190 }, { "epoch": 0.4052210553506131, "grad_norm": 0.9530982375144958, "learning_rate": 1.190342721904962e-05, "loss": 2.9511743545532227, "step": 50200 }, { "epoch": 0.4053017766763801, "grad_norm": 0.9059482216835022, "learning_rate": 1.1901811749311407e-05, "loss": 2.7891538619995115, "step": 50210 }, { "epoch": 0.4053824980021472, "grad_norm": 0.9107838869094849, "learning_rate": 1.1900196279573193e-05, "loss": 3.25976448059082, "step": 50220 }, { "epoch": 0.40546321932791424, "grad_norm": 0.6848693490028381, "learning_rate": 1.189858080983498e-05, "loss": 2.8781829833984376, "step": 50230 }, { "epoch": 0.4055439406536813, "grad_norm": 1.2431515455245972, "learning_rate": 1.1896965340096767e-05, "loss": 2.8291778564453125, "step": 50240 }, { "epoch": 0.40562466197944835, "grad_norm": 0.7838724255561829, "learning_rate": 1.1895349870358554e-05, "loss": 2.808681869506836, "step": 50250 }, { "epoch": 0.4057053833052154, "grad_norm": 0.7984434962272644, "learning_rate": 1.189373440062034e-05, "loss": 2.706964302062988, "step": 50260 }, { "epoch": 0.40578610463098247, "grad_norm": 0.6514309048652649, "learning_rate": 1.1892118930882128e-05, "loss": 3.1060258865356447, "step": 50270 }, { "epoch": 0.4058668259567495, "grad_norm": 0.8960868120193481, "learning_rate": 1.1890503461143914e-05, "loss": 2.5154836654663084, "step": 50280 }, { "epoch": 0.4059475472825166, "grad_norm": 0.9655497074127197, "learning_rate": 1.1888887991405702e-05, "loss": 2.952366065979004, "step": 50290 }, { "epoch": 0.4060282686082836, "grad_norm": 1.0738669633865356, "learning_rate": 1.1887272521667488e-05, "loss": 2.954641342163086, "step": 50300 }, { "epoch": 0.4061089899340507, "grad_norm": 1.130043625831604, "learning_rate": 1.1885657051929275e-05, "loss": 3.0370540618896484, "step": 50310 }, { "epoch": 0.40618971125981773, "grad_norm": 0.6186408996582031, "learning_rate": 1.1884041582191061e-05, "loss": 2.655797004699707, "step": 50320 }, { "epoch": 0.40627043258558476, "grad_norm": 0.9343775510787964, "learning_rate": 1.1882426112452849e-05, "loss": 2.6476621627807617, "step": 50330 }, { "epoch": 0.40635115391135185, "grad_norm": 0.7026702165603638, "learning_rate": 1.1880810642714635e-05, "loss": 2.7722978591918945, "step": 50340 }, { "epoch": 0.4064318752371189, "grad_norm": 0.9367664456367493, "learning_rate": 1.1879195172976423e-05, "loss": 3.1858470916748045, "step": 50350 }, { "epoch": 0.40651259656288596, "grad_norm": 0.8589795827865601, "learning_rate": 1.1877579703238209e-05, "loss": 3.2439064025878905, "step": 50360 }, { "epoch": 0.406593317888653, "grad_norm": 1.5648117065429688, "learning_rate": 1.1875964233499996e-05, "loss": 2.704199028015137, "step": 50370 }, { "epoch": 0.4066740392144201, "grad_norm": 1.1498231887817383, "learning_rate": 1.1874348763761784e-05, "loss": 2.9985727310180663, "step": 50380 }, { "epoch": 0.4067547605401871, "grad_norm": 2.5954010486602783, "learning_rate": 1.187273329402357e-05, "loss": 2.9978879928588866, "step": 50390 }, { "epoch": 0.40683548186595414, "grad_norm": 1.8316313028335571, "learning_rate": 1.187111782428536e-05, "loss": 3.0575380325317383, "step": 50400 }, { "epoch": 0.4069162031917212, "grad_norm": 0.6835392117500305, "learning_rate": 1.1869502354547144e-05, "loss": 3.3013900756835937, "step": 50410 }, { "epoch": 0.40699692451748826, "grad_norm": 1.6951534748077393, "learning_rate": 1.1867886884808933e-05, "loss": 2.859977912902832, "step": 50420 }, { "epoch": 0.40707764584325534, "grad_norm": 0.5290747284889221, "learning_rate": 1.1866271415070717e-05, "loss": 3.0505840301513674, "step": 50430 }, { "epoch": 0.4071583671690224, "grad_norm": 1.0468404293060303, "learning_rate": 1.1864655945332507e-05, "loss": 2.9559993743896484, "step": 50440 }, { "epoch": 0.40723908849478946, "grad_norm": 0.727609395980835, "learning_rate": 1.1863040475594291e-05, "loss": 2.535488510131836, "step": 50450 }, { "epoch": 0.4073198098205565, "grad_norm": 1.299033522605896, "learning_rate": 1.186142500585608e-05, "loss": 3.039982223510742, "step": 50460 }, { "epoch": 0.4074005311463236, "grad_norm": 0.9419155120849609, "learning_rate": 1.1859809536117865e-05, "loss": 2.7671396255493166, "step": 50470 }, { "epoch": 0.4074812524720906, "grad_norm": 0.8338165283203125, "learning_rate": 1.1858194066379654e-05, "loss": 2.876755142211914, "step": 50480 }, { "epoch": 0.40756197379785764, "grad_norm": 0.6314684748649597, "learning_rate": 1.1856578596641439e-05, "loss": 3.1171823501586915, "step": 50490 }, { "epoch": 0.4076426951236247, "grad_norm": 0.862206757068634, "learning_rate": 1.1854963126903228e-05, "loss": 2.8778594970703124, "step": 50500 }, { "epoch": 0.40772341644939175, "grad_norm": 0.8971342444419861, "learning_rate": 1.1853347657165012e-05, "loss": 2.8237735748291017, "step": 50510 }, { "epoch": 0.40780413777515884, "grad_norm": 1.5417693853378296, "learning_rate": 1.1851732187426802e-05, "loss": 3.1680173873901367, "step": 50520 }, { "epoch": 0.40788485910092587, "grad_norm": 1.640410304069519, "learning_rate": 1.1850116717688586e-05, "loss": 2.563779830932617, "step": 50530 }, { "epoch": 0.40796558042669295, "grad_norm": 1.112412691116333, "learning_rate": 1.1848501247950375e-05, "loss": 2.972715950012207, "step": 50540 }, { "epoch": 0.40804630175246, "grad_norm": 0.6606821417808533, "learning_rate": 1.184688577821216e-05, "loss": 2.66503963470459, "step": 50550 }, { "epoch": 0.408127023078227, "grad_norm": 0.7479551434516907, "learning_rate": 1.1845270308473949e-05, "loss": 2.902629280090332, "step": 50560 }, { "epoch": 0.4082077444039941, "grad_norm": 0.6047378778457642, "learning_rate": 1.1843654838735735e-05, "loss": 2.9863351821899413, "step": 50570 }, { "epoch": 0.40828846572976113, "grad_norm": 1.1046249866485596, "learning_rate": 1.1842039368997523e-05, "loss": 2.5526960372924803, "step": 50580 }, { "epoch": 0.4083691870555282, "grad_norm": 1.1640948057174683, "learning_rate": 1.1840423899259309e-05, "loss": 2.9994115829467773, "step": 50590 }, { "epoch": 0.40844990838129525, "grad_norm": 0.968104362487793, "learning_rate": 1.1838808429521096e-05, "loss": 3.048457717895508, "step": 50600 }, { "epoch": 0.40853062970706233, "grad_norm": 0.7432757019996643, "learning_rate": 1.1837192959782882e-05, "loss": 3.110112953186035, "step": 50610 }, { "epoch": 0.40861135103282936, "grad_norm": 1.271990180015564, "learning_rate": 1.183557749004467e-05, "loss": 2.8863224029541015, "step": 50620 }, { "epoch": 0.4086920723585964, "grad_norm": 0.9040367007255554, "learning_rate": 1.1833962020306456e-05, "loss": 2.9125694274902343, "step": 50630 }, { "epoch": 0.4087727936843635, "grad_norm": 0.9615034461021423, "learning_rate": 1.1832346550568244e-05, "loss": 2.8673690795898437, "step": 50640 }, { "epoch": 0.4088535150101305, "grad_norm": 0.7690719962120056, "learning_rate": 1.183073108083003e-05, "loss": 2.7048032760620115, "step": 50650 }, { "epoch": 0.4089342363358976, "grad_norm": 0.8223295211791992, "learning_rate": 1.1829115611091817e-05, "loss": 2.6427093505859376, "step": 50660 }, { "epoch": 0.4090149576616646, "grad_norm": 1.073302984237671, "learning_rate": 1.1827500141353603e-05, "loss": 2.9635786056518554, "step": 50670 }, { "epoch": 0.4090956789874317, "grad_norm": 0.8400455713272095, "learning_rate": 1.1825884671615391e-05, "loss": 3.0186155319213865, "step": 50680 }, { "epoch": 0.40917640031319874, "grad_norm": 1.1779404878616333, "learning_rate": 1.1824269201877177e-05, "loss": 2.8726930618286133, "step": 50690 }, { "epoch": 0.40925712163896577, "grad_norm": 1.4149692058563232, "learning_rate": 1.1822653732138965e-05, "loss": 2.7832271575927736, "step": 50700 }, { "epoch": 0.40933784296473286, "grad_norm": 0.683355450630188, "learning_rate": 1.1821038262400751e-05, "loss": 2.8021106719970703, "step": 50710 }, { "epoch": 0.4094185642904999, "grad_norm": 1.0855755805969238, "learning_rate": 1.1819422792662539e-05, "loss": 2.7119237899780275, "step": 50720 }, { "epoch": 0.409499285616267, "grad_norm": 0.7103063464164734, "learning_rate": 1.1817807322924325e-05, "loss": 2.8536617279052736, "step": 50730 }, { "epoch": 0.409580006942034, "grad_norm": 0.9476026296615601, "learning_rate": 1.1816191853186112e-05, "loss": 2.9763036727905274, "step": 50740 }, { "epoch": 0.4096607282678011, "grad_norm": 0.848513662815094, "learning_rate": 1.1814576383447898e-05, "loss": 2.6742326736450197, "step": 50750 }, { "epoch": 0.4097414495935681, "grad_norm": 1.2993745803833008, "learning_rate": 1.1812960913709686e-05, "loss": 2.566445159912109, "step": 50760 }, { "epoch": 0.4098221709193352, "grad_norm": 1.1157565116882324, "learning_rate": 1.1811345443971472e-05, "loss": 2.6126874923706054, "step": 50770 }, { "epoch": 0.40990289224510223, "grad_norm": 0.7743813991546631, "learning_rate": 1.180972997423326e-05, "loss": 2.993402862548828, "step": 50780 }, { "epoch": 0.40998361357086927, "grad_norm": 0.6465250253677368, "learning_rate": 1.1808114504495046e-05, "loss": 2.5840280532836912, "step": 50790 }, { "epoch": 0.41006433489663635, "grad_norm": 0.9718100428581238, "learning_rate": 1.1806499034756833e-05, "loss": 3.209055709838867, "step": 50800 }, { "epoch": 0.4101450562224034, "grad_norm": 0.9147691130638123, "learning_rate": 1.180488356501862e-05, "loss": 3.1001588821411135, "step": 50810 }, { "epoch": 0.41022577754817047, "grad_norm": 1.1561646461486816, "learning_rate": 1.1803268095280407e-05, "loss": 2.9147926330566407, "step": 50820 }, { "epoch": 0.4103064988739375, "grad_norm": 0.6661585569381714, "learning_rate": 1.1801652625542193e-05, "loss": 2.771625518798828, "step": 50830 }, { "epoch": 0.4103872201997046, "grad_norm": 0.9698095321655273, "learning_rate": 1.180003715580398e-05, "loss": 2.996306610107422, "step": 50840 }, { "epoch": 0.4104679415254716, "grad_norm": 0.5316442847251892, "learning_rate": 1.1798421686065767e-05, "loss": 2.581366539001465, "step": 50850 }, { "epoch": 0.41054866285123864, "grad_norm": 0.6177887916564941, "learning_rate": 1.1796806216327554e-05, "loss": 2.6490451812744142, "step": 50860 }, { "epoch": 0.41062938417700573, "grad_norm": 0.6072285771369934, "learning_rate": 1.179519074658934e-05, "loss": 2.5367298126220703, "step": 50870 }, { "epoch": 0.41071010550277276, "grad_norm": 1.6703662872314453, "learning_rate": 1.1793575276851128e-05, "loss": 3.5158412933349608, "step": 50880 }, { "epoch": 0.41079082682853985, "grad_norm": 0.761816143989563, "learning_rate": 1.1791959807112914e-05, "loss": 2.9788841247558593, "step": 50890 }, { "epoch": 0.4108715481543069, "grad_norm": 0.8614269495010376, "learning_rate": 1.1790344337374702e-05, "loss": 2.633802795410156, "step": 50900 }, { "epoch": 0.41095226948007396, "grad_norm": 0.6089040637016296, "learning_rate": 1.1788728867636488e-05, "loss": 2.9633636474609375, "step": 50910 }, { "epoch": 0.411032990805841, "grad_norm": 0.9270999431610107, "learning_rate": 1.1787113397898275e-05, "loss": 2.9604591369628905, "step": 50920 }, { "epoch": 0.411113712131608, "grad_norm": 0.813933789730072, "learning_rate": 1.1785497928160061e-05, "loss": 2.58837833404541, "step": 50930 }, { "epoch": 0.4111944334573751, "grad_norm": 1.2516663074493408, "learning_rate": 1.1783882458421849e-05, "loss": 2.520097351074219, "step": 50940 }, { "epoch": 0.41127515478314214, "grad_norm": 0.868634045124054, "learning_rate": 1.1782266988683635e-05, "loss": 3.1704492568969727, "step": 50950 }, { "epoch": 0.4113558761089092, "grad_norm": 0.8743923902511597, "learning_rate": 1.1780651518945423e-05, "loss": 2.6646074295043944, "step": 50960 }, { "epoch": 0.41143659743467625, "grad_norm": 0.8142127990722656, "learning_rate": 1.1779036049207209e-05, "loss": 2.817296028137207, "step": 50970 }, { "epoch": 0.41151731876044334, "grad_norm": 1.1958112716674805, "learning_rate": 1.1777420579468997e-05, "loss": 2.6514398574829103, "step": 50980 }, { "epoch": 0.41159804008621037, "grad_norm": 0.6601693034172058, "learning_rate": 1.1775805109730783e-05, "loss": 2.8486679077148436, "step": 50990 }, { "epoch": 0.41167876141197746, "grad_norm": 0.9554387331008911, "learning_rate": 1.177418963999257e-05, "loss": 3.0653461456298827, "step": 51000 }, { "epoch": 0.4117594827377445, "grad_norm": 1.1088178157806396, "learning_rate": 1.1772574170254356e-05, "loss": 2.6809261322021483, "step": 51010 }, { "epoch": 0.4118402040635115, "grad_norm": 0.8069183230400085, "learning_rate": 1.1770958700516144e-05, "loss": 2.82330436706543, "step": 51020 }, { "epoch": 0.4119209253892786, "grad_norm": 0.8043506145477295, "learning_rate": 1.176934323077793e-05, "loss": 2.961159324645996, "step": 51030 }, { "epoch": 0.41200164671504563, "grad_norm": 0.7030649185180664, "learning_rate": 1.1767727761039718e-05, "loss": 2.704616355895996, "step": 51040 }, { "epoch": 0.4120823680408127, "grad_norm": 0.9242509007453918, "learning_rate": 1.1766112291301504e-05, "loss": 3.0594024658203125, "step": 51050 }, { "epoch": 0.41216308936657975, "grad_norm": 1.4479882717132568, "learning_rate": 1.1764496821563291e-05, "loss": 2.945989799499512, "step": 51060 }, { "epoch": 0.41224381069234683, "grad_norm": 1.1261626482009888, "learning_rate": 1.1762881351825077e-05, "loss": 3.5183856964111326, "step": 51070 }, { "epoch": 0.41232453201811387, "grad_norm": 0.9517770409584045, "learning_rate": 1.1761265882086865e-05, "loss": 2.5622949600219727, "step": 51080 }, { "epoch": 0.4124052533438809, "grad_norm": 1.3260902166366577, "learning_rate": 1.1759650412348651e-05, "loss": 2.898147392272949, "step": 51090 }, { "epoch": 0.412485974669648, "grad_norm": 0.9852213859558105, "learning_rate": 1.1758034942610439e-05, "loss": 2.501441764831543, "step": 51100 }, { "epoch": 0.412566695995415, "grad_norm": 0.7067984938621521, "learning_rate": 1.1756419472872225e-05, "loss": 3.019563102722168, "step": 51110 }, { "epoch": 0.4126474173211821, "grad_norm": 0.8361302614212036, "learning_rate": 1.1754804003134012e-05, "loss": 3.1204452514648438, "step": 51120 }, { "epoch": 0.4127281386469491, "grad_norm": 0.9000462889671326, "learning_rate": 1.1753188533395798e-05, "loss": 3.186577796936035, "step": 51130 }, { "epoch": 0.4128088599727162, "grad_norm": 0.710760772228241, "learning_rate": 1.1751573063657586e-05, "loss": 2.5194778442382812, "step": 51140 }, { "epoch": 0.41288958129848324, "grad_norm": 0.9511508345603943, "learning_rate": 1.1749957593919372e-05, "loss": 3.15944881439209, "step": 51150 }, { "epoch": 0.4129703026242503, "grad_norm": 0.7993541955947876, "learning_rate": 1.174834212418116e-05, "loss": 2.563212585449219, "step": 51160 }, { "epoch": 0.41305102395001736, "grad_norm": 0.8728112578392029, "learning_rate": 1.1746726654442946e-05, "loss": 2.629275321960449, "step": 51170 }, { "epoch": 0.4131317452757844, "grad_norm": 1.024481177330017, "learning_rate": 1.1745111184704733e-05, "loss": 3.0446935653686524, "step": 51180 }, { "epoch": 0.4132124666015515, "grad_norm": 0.722906768321991, "learning_rate": 1.174349571496652e-05, "loss": 3.2227104187011717, "step": 51190 }, { "epoch": 0.4132931879273185, "grad_norm": 0.7083399295806885, "learning_rate": 1.1741880245228307e-05, "loss": 3.5590888977050783, "step": 51200 }, { "epoch": 0.4133739092530856, "grad_norm": 0.6029816269874573, "learning_rate": 1.1740264775490093e-05, "loss": 3.160915565490723, "step": 51210 }, { "epoch": 0.4134546305788526, "grad_norm": 1.0852969884872437, "learning_rate": 1.173864930575188e-05, "loss": 3.322541046142578, "step": 51220 }, { "epoch": 0.4135353519046197, "grad_norm": 0.9636967778205872, "learning_rate": 1.1737033836013667e-05, "loss": 2.817468452453613, "step": 51230 }, { "epoch": 0.41361607323038674, "grad_norm": 0.8648326396942139, "learning_rate": 1.1735418366275455e-05, "loss": 2.7170440673828127, "step": 51240 }, { "epoch": 0.41369679455615377, "grad_norm": 0.7276325225830078, "learning_rate": 1.173380289653724e-05, "loss": 3.120686721801758, "step": 51250 }, { "epoch": 0.41377751588192085, "grad_norm": 0.7236469984054565, "learning_rate": 1.1732187426799028e-05, "loss": 3.032881164550781, "step": 51260 }, { "epoch": 0.4138582372076879, "grad_norm": 0.568011999130249, "learning_rate": 1.1730571957060814e-05, "loss": 3.268320083618164, "step": 51270 }, { "epoch": 0.41393895853345497, "grad_norm": 1.583770513534546, "learning_rate": 1.1728956487322602e-05, "loss": 2.9203117370605467, "step": 51280 }, { "epoch": 0.414019679859222, "grad_norm": 1.047226905822754, "learning_rate": 1.1727341017584388e-05, "loss": 2.601306343078613, "step": 51290 }, { "epoch": 0.4141004011849891, "grad_norm": 1.0625923871994019, "learning_rate": 1.1725725547846176e-05, "loss": 3.1824506759643554, "step": 51300 }, { "epoch": 0.4141811225107561, "grad_norm": 0.7376335859298706, "learning_rate": 1.1724110078107962e-05, "loss": 2.8888923645019533, "step": 51310 }, { "epoch": 0.41426184383652315, "grad_norm": 1.4951084852218628, "learning_rate": 1.172249460836975e-05, "loss": 2.6207159042358397, "step": 51320 }, { "epoch": 0.41434256516229023, "grad_norm": 1.0272952318191528, "learning_rate": 1.1720879138631535e-05, "loss": 2.562793731689453, "step": 51330 }, { "epoch": 0.41442328648805726, "grad_norm": 1.8627698421478271, "learning_rate": 1.1719263668893323e-05, "loss": 3.2562889099121093, "step": 51340 }, { "epoch": 0.41450400781382435, "grad_norm": 0.8452377915382385, "learning_rate": 1.1717648199155109e-05, "loss": 2.6517763137817383, "step": 51350 }, { "epoch": 0.4145847291395914, "grad_norm": 0.7125350832939148, "learning_rate": 1.1716032729416897e-05, "loss": 2.7403127670288088, "step": 51360 }, { "epoch": 0.41466545046535846, "grad_norm": 0.7333825826644897, "learning_rate": 1.1714417259678683e-05, "loss": 3.0346073150634765, "step": 51370 }, { "epoch": 0.4147461717911255, "grad_norm": 0.7823072671890259, "learning_rate": 1.171280178994047e-05, "loss": 2.873042106628418, "step": 51380 }, { "epoch": 0.4148268931168925, "grad_norm": 1.1912251710891724, "learning_rate": 1.1711186320202256e-05, "loss": 3.216444396972656, "step": 51390 }, { "epoch": 0.4149076144426596, "grad_norm": 0.8359587788581848, "learning_rate": 1.1709570850464044e-05, "loss": 2.7933467864990233, "step": 51400 }, { "epoch": 0.41498833576842664, "grad_norm": 0.7635625004768372, "learning_rate": 1.170795538072583e-05, "loss": 2.514461708068848, "step": 51410 }, { "epoch": 0.4150690570941937, "grad_norm": 0.6165297031402588, "learning_rate": 1.1706339910987618e-05, "loss": 2.921297073364258, "step": 51420 }, { "epoch": 0.41514977841996076, "grad_norm": 1.0350743532180786, "learning_rate": 1.1704724441249404e-05, "loss": 2.7574132919311523, "step": 51430 }, { "epoch": 0.41523049974572784, "grad_norm": 0.8378244638442993, "learning_rate": 1.1703108971511193e-05, "loss": 2.4653953552246093, "step": 51440 }, { "epoch": 0.4153112210714949, "grad_norm": 0.6854841709136963, "learning_rate": 1.1701493501772977e-05, "loss": 2.6060142517089844, "step": 51450 }, { "epoch": 0.41539194239726196, "grad_norm": 0.7214520573616028, "learning_rate": 1.1699878032034767e-05, "loss": 2.955513763427734, "step": 51460 }, { "epoch": 0.415472663723029, "grad_norm": 0.9859307408332825, "learning_rate": 1.1698262562296551e-05, "loss": 2.670937347412109, "step": 51470 }, { "epoch": 0.415553385048796, "grad_norm": 0.8627501130104065, "learning_rate": 1.169664709255834e-05, "loss": 3.1180841445922853, "step": 51480 }, { "epoch": 0.4156341063745631, "grad_norm": 0.6481920480728149, "learning_rate": 1.1695031622820125e-05, "loss": 2.7110418319702148, "step": 51490 }, { "epoch": 0.41571482770033014, "grad_norm": 1.1872706413269043, "learning_rate": 1.1693416153081914e-05, "loss": 2.893813133239746, "step": 51500 }, { "epoch": 0.4157955490260972, "grad_norm": 1.1794430017471313, "learning_rate": 1.1691800683343699e-05, "loss": 2.467612648010254, "step": 51510 }, { "epoch": 0.41587627035186425, "grad_norm": 1.2043386697769165, "learning_rate": 1.1690185213605488e-05, "loss": 2.884860610961914, "step": 51520 }, { "epoch": 0.41595699167763134, "grad_norm": 0.7361574769020081, "learning_rate": 1.1688569743867272e-05, "loss": 2.821905326843262, "step": 51530 }, { "epoch": 0.41603771300339837, "grad_norm": 0.9511124491691589, "learning_rate": 1.1686954274129062e-05, "loss": 2.709084892272949, "step": 51540 }, { "epoch": 0.4161184343291654, "grad_norm": 1.0507327318191528, "learning_rate": 1.1685338804390846e-05, "loss": 2.922606658935547, "step": 51550 }, { "epoch": 0.4161991556549325, "grad_norm": 0.7480915188789368, "learning_rate": 1.1683723334652635e-05, "loss": 2.7415185928344727, "step": 51560 }, { "epoch": 0.4162798769806995, "grad_norm": 1.0007436275482178, "learning_rate": 1.168210786491442e-05, "loss": 3.2534763336181642, "step": 51570 }, { "epoch": 0.4163605983064666, "grad_norm": 0.9707320928573608, "learning_rate": 1.1680492395176209e-05, "loss": 3.0938844680786133, "step": 51580 }, { "epoch": 0.41644131963223363, "grad_norm": 0.8278511166572571, "learning_rate": 1.1678876925437995e-05, "loss": 2.899579620361328, "step": 51590 }, { "epoch": 0.4165220409580007, "grad_norm": 1.4652239084243774, "learning_rate": 1.1677261455699783e-05, "loss": 3.1582212448120117, "step": 51600 }, { "epoch": 0.41660276228376775, "grad_norm": 0.6003676056861877, "learning_rate": 1.1675645985961569e-05, "loss": 3.6242347717285157, "step": 51610 }, { "epoch": 0.4166834836095348, "grad_norm": 1.0651248693466187, "learning_rate": 1.1674030516223356e-05, "loss": 2.984124183654785, "step": 51620 }, { "epoch": 0.41676420493530186, "grad_norm": 0.7255059480667114, "learning_rate": 1.1672415046485142e-05, "loss": 3.1512430191040037, "step": 51630 }, { "epoch": 0.4168449262610689, "grad_norm": 1.747938871383667, "learning_rate": 1.167079957674693e-05, "loss": 2.9088823318481447, "step": 51640 }, { "epoch": 0.416925647586836, "grad_norm": 0.8069676160812378, "learning_rate": 1.1669184107008718e-05, "loss": 2.8300317764282226, "step": 51650 }, { "epoch": 0.417006368912603, "grad_norm": 1.2926164865493774, "learning_rate": 1.1667568637270504e-05, "loss": 2.5750951766967773, "step": 51660 }, { "epoch": 0.4170870902383701, "grad_norm": 0.8932434320449829, "learning_rate": 1.1665953167532291e-05, "loss": 2.504623794555664, "step": 51670 }, { "epoch": 0.4171678115641371, "grad_norm": 0.814162015914917, "learning_rate": 1.1664337697794077e-05, "loss": 2.692332458496094, "step": 51680 }, { "epoch": 0.4172485328899042, "grad_norm": 0.86920166015625, "learning_rate": 1.1662722228055865e-05, "loss": 3.070794105529785, "step": 51690 }, { "epoch": 0.41732925421567124, "grad_norm": 0.9652422666549683, "learning_rate": 1.1661106758317651e-05, "loss": 3.0413091659545897, "step": 51700 }, { "epoch": 0.41740997554143827, "grad_norm": 0.692730724811554, "learning_rate": 1.1659491288579439e-05, "loss": 2.586447334289551, "step": 51710 }, { "epoch": 0.41749069686720536, "grad_norm": 0.6392233967781067, "learning_rate": 1.1657875818841225e-05, "loss": 2.7568632125854493, "step": 51720 }, { "epoch": 0.4175714181929724, "grad_norm": 0.9549559950828552, "learning_rate": 1.1656260349103012e-05, "loss": 3.0230640411376952, "step": 51730 }, { "epoch": 0.4176521395187395, "grad_norm": 1.1439448595046997, "learning_rate": 1.1654644879364798e-05, "loss": 2.842207908630371, "step": 51740 }, { "epoch": 0.4177328608445065, "grad_norm": 0.5993251800537109, "learning_rate": 1.1653029409626586e-05, "loss": 2.813892936706543, "step": 51750 }, { "epoch": 0.4178135821702736, "grad_norm": 1.0294151306152344, "learning_rate": 1.1651413939888372e-05, "loss": 2.7174556732177733, "step": 51760 }, { "epoch": 0.4178943034960406, "grad_norm": 1.21659255027771, "learning_rate": 1.164979847015016e-05, "loss": 3.282621383666992, "step": 51770 }, { "epoch": 0.41797502482180765, "grad_norm": 1.1840475797653198, "learning_rate": 1.1648183000411946e-05, "loss": 3.0166448593139648, "step": 51780 }, { "epoch": 0.41805574614757474, "grad_norm": 0.6219843626022339, "learning_rate": 1.1646567530673734e-05, "loss": 3.3380313873291017, "step": 51790 }, { "epoch": 0.41813646747334177, "grad_norm": 0.9495280385017395, "learning_rate": 1.164495206093552e-05, "loss": 2.969356918334961, "step": 51800 }, { "epoch": 0.41821718879910885, "grad_norm": 0.6728907823562622, "learning_rate": 1.1643336591197307e-05, "loss": 2.4648141860961914, "step": 51810 }, { "epoch": 0.4182979101248759, "grad_norm": 0.7266985774040222, "learning_rate": 1.1641721121459093e-05, "loss": 2.742276191711426, "step": 51820 }, { "epoch": 0.41837863145064297, "grad_norm": 1.9046128988265991, "learning_rate": 1.1640105651720881e-05, "loss": 3.1800359725952148, "step": 51830 }, { "epoch": 0.41845935277641, "grad_norm": 1.4230797290802002, "learning_rate": 1.1638490181982667e-05, "loss": 3.0163015365600585, "step": 51840 }, { "epoch": 0.41854007410217703, "grad_norm": 0.9980233311653137, "learning_rate": 1.1636874712244455e-05, "loss": 2.5234413146972656, "step": 51850 }, { "epoch": 0.4186207954279441, "grad_norm": 0.8825963735580444, "learning_rate": 1.163525924250624e-05, "loss": 3.073794937133789, "step": 51860 }, { "epoch": 0.41870151675371114, "grad_norm": 0.7064600586891174, "learning_rate": 1.1633643772768028e-05, "loss": 3.0746109008789064, "step": 51870 }, { "epoch": 0.41878223807947823, "grad_norm": 1.3666038513183594, "learning_rate": 1.1632028303029814e-05, "loss": 3.0409130096435546, "step": 51880 }, { "epoch": 0.41886295940524526, "grad_norm": 0.7811556458473206, "learning_rate": 1.1630412833291602e-05, "loss": 2.9007457733154296, "step": 51890 }, { "epoch": 0.41894368073101235, "grad_norm": 0.6123215556144714, "learning_rate": 1.1628797363553388e-05, "loss": 2.521157646179199, "step": 51900 }, { "epoch": 0.4190244020567794, "grad_norm": 0.9069546461105347, "learning_rate": 1.1627181893815176e-05, "loss": 2.682036781311035, "step": 51910 }, { "epoch": 0.4191051233825464, "grad_norm": 2.3015527725219727, "learning_rate": 1.1625566424076962e-05, "loss": 2.835835075378418, "step": 51920 }, { "epoch": 0.4191858447083135, "grad_norm": 1.5143789052963257, "learning_rate": 1.162395095433875e-05, "loss": 2.8998645782470702, "step": 51930 }, { "epoch": 0.4192665660340805, "grad_norm": 1.510958194732666, "learning_rate": 1.1622335484600535e-05, "loss": 3.2158267974853514, "step": 51940 }, { "epoch": 0.4193472873598476, "grad_norm": 0.8040833473205566, "learning_rate": 1.1620720014862323e-05, "loss": 2.680657958984375, "step": 51950 }, { "epoch": 0.41942800868561464, "grad_norm": 0.8565717339515686, "learning_rate": 1.1619104545124109e-05, "loss": 2.4232719421386717, "step": 51960 }, { "epoch": 0.4195087300113817, "grad_norm": 0.7811546921730042, "learning_rate": 1.1617489075385897e-05, "loss": 3.130076789855957, "step": 51970 }, { "epoch": 0.41958945133714876, "grad_norm": 0.9807952642440796, "learning_rate": 1.1615873605647683e-05, "loss": 2.752449607849121, "step": 51980 }, { "epoch": 0.41967017266291584, "grad_norm": 1.209867238998413, "learning_rate": 1.161425813590947e-05, "loss": 2.706551170349121, "step": 51990 }, { "epoch": 0.41975089398868287, "grad_norm": 1.081117868423462, "learning_rate": 1.1612642666171256e-05, "loss": 2.498819923400879, "step": 52000 }, { "epoch": 0.4198316153144499, "grad_norm": 1.0594204664230347, "learning_rate": 1.1611027196433044e-05, "loss": 2.4294355392456053, "step": 52010 }, { "epoch": 0.419912336640217, "grad_norm": 0.7114371657371521, "learning_rate": 1.160941172669483e-05, "loss": 3.2954479217529298, "step": 52020 }, { "epoch": 0.419993057965984, "grad_norm": 1.3114204406738281, "learning_rate": 1.1607796256956618e-05, "loss": 3.3954513549804686, "step": 52030 }, { "epoch": 0.4200737792917511, "grad_norm": 0.8875061869621277, "learning_rate": 1.1606180787218404e-05, "loss": 2.749618339538574, "step": 52040 }, { "epoch": 0.42015450061751813, "grad_norm": 1.0748035907745361, "learning_rate": 1.1604565317480192e-05, "loss": 3.179495429992676, "step": 52050 }, { "epoch": 0.4202352219432852, "grad_norm": 0.8713651299476624, "learning_rate": 1.1602949847741978e-05, "loss": 2.7431312561035157, "step": 52060 }, { "epoch": 0.42031594326905225, "grad_norm": 1.430762767791748, "learning_rate": 1.1601334378003765e-05, "loss": 3.3151065826416017, "step": 52070 }, { "epoch": 0.4203966645948193, "grad_norm": 0.8343519568443298, "learning_rate": 1.1599718908265551e-05, "loss": 3.3086132049560546, "step": 52080 }, { "epoch": 0.42047738592058637, "grad_norm": 0.9420333504676819, "learning_rate": 1.1598103438527339e-05, "loss": 2.8497035980224608, "step": 52090 }, { "epoch": 0.4205581072463534, "grad_norm": 1.4681458473205566, "learning_rate": 1.1596487968789125e-05, "loss": 2.9273038864135743, "step": 52100 }, { "epoch": 0.4206388285721205, "grad_norm": 0.9480681419372559, "learning_rate": 1.1594872499050913e-05, "loss": 3.077292633056641, "step": 52110 }, { "epoch": 0.4207195498978875, "grad_norm": 0.9098886251449585, "learning_rate": 1.1593257029312699e-05, "loss": 2.860546875, "step": 52120 }, { "epoch": 0.4208002712236546, "grad_norm": 0.6593102216720581, "learning_rate": 1.1591641559574486e-05, "loss": 2.7124330520629885, "step": 52130 }, { "epoch": 0.42088099254942163, "grad_norm": 0.8589145541191101, "learning_rate": 1.1590026089836272e-05, "loss": 2.984981727600098, "step": 52140 }, { "epoch": 0.42096171387518866, "grad_norm": 0.8085398077964783, "learning_rate": 1.158841062009806e-05, "loss": 2.728211212158203, "step": 52150 }, { "epoch": 0.42104243520095574, "grad_norm": 0.5893798470497131, "learning_rate": 1.1586795150359846e-05, "loss": 2.841893196105957, "step": 52160 }, { "epoch": 0.4211231565267228, "grad_norm": 0.831429123878479, "learning_rate": 1.1585179680621634e-05, "loss": 2.6369951248168944, "step": 52170 }, { "epoch": 0.42120387785248986, "grad_norm": 0.9836119413375854, "learning_rate": 1.158356421088342e-05, "loss": 2.6628320693969725, "step": 52180 }, { "epoch": 0.4212845991782569, "grad_norm": 1.4358749389648438, "learning_rate": 1.1581948741145207e-05, "loss": 2.9067966461181642, "step": 52190 }, { "epoch": 0.421365320504024, "grad_norm": 0.9660671353340149, "learning_rate": 1.1580333271406993e-05, "loss": 2.9573492050170898, "step": 52200 }, { "epoch": 0.421446041829791, "grad_norm": 0.6192970871925354, "learning_rate": 1.1578717801668781e-05, "loss": 3.0193096160888673, "step": 52210 }, { "epoch": 0.4215267631555581, "grad_norm": 0.7550476789474487, "learning_rate": 1.1577102331930567e-05, "loss": 2.7470937728881837, "step": 52220 }, { "epoch": 0.4216074844813251, "grad_norm": 0.6288673281669617, "learning_rate": 1.1575486862192355e-05, "loss": 2.8132972717285156, "step": 52230 }, { "epoch": 0.42168820580709215, "grad_norm": 0.8277441263198853, "learning_rate": 1.157387139245414e-05, "loss": 2.6198728561401365, "step": 52240 }, { "epoch": 0.42176892713285924, "grad_norm": 1.263850212097168, "learning_rate": 1.1572255922715928e-05, "loss": 3.174464988708496, "step": 52250 }, { "epoch": 0.42184964845862627, "grad_norm": 0.6444544196128845, "learning_rate": 1.1570640452977714e-05, "loss": 2.6831792831420898, "step": 52260 }, { "epoch": 0.42193036978439336, "grad_norm": 0.8613671064376831, "learning_rate": 1.1569024983239502e-05, "loss": 3.2153244018554688, "step": 52270 }, { "epoch": 0.4220110911101604, "grad_norm": 0.8674046993255615, "learning_rate": 1.1567409513501288e-05, "loss": 2.5303604125976564, "step": 52280 }, { "epoch": 0.42209181243592747, "grad_norm": 1.1462663412094116, "learning_rate": 1.1565794043763076e-05, "loss": 2.949458885192871, "step": 52290 }, { "epoch": 0.4221725337616945, "grad_norm": 1.0687428712844849, "learning_rate": 1.1564178574024862e-05, "loss": 2.904986572265625, "step": 52300 }, { "epoch": 0.42225325508746153, "grad_norm": 1.3458051681518555, "learning_rate": 1.1562563104286651e-05, "loss": 2.8560598373413084, "step": 52310 }, { "epoch": 0.4223339764132286, "grad_norm": 0.7700908184051514, "learning_rate": 1.1560947634548436e-05, "loss": 2.6455429077148436, "step": 52320 }, { "epoch": 0.42241469773899565, "grad_norm": 1.0389381647109985, "learning_rate": 1.1559332164810225e-05, "loss": 3.114794921875, "step": 52330 }, { "epoch": 0.42249541906476273, "grad_norm": 0.767675518989563, "learning_rate": 1.155771669507201e-05, "loss": 2.70128173828125, "step": 52340 }, { "epoch": 0.42257614039052976, "grad_norm": 0.6341813206672668, "learning_rate": 1.1556101225333799e-05, "loss": 2.7208417892456054, "step": 52350 }, { "epoch": 0.42265686171629685, "grad_norm": 1.0954216718673706, "learning_rate": 1.1554485755595583e-05, "loss": 2.943169593811035, "step": 52360 }, { "epoch": 0.4227375830420639, "grad_norm": 0.7147678136825562, "learning_rate": 1.1552870285857372e-05, "loss": 2.675938034057617, "step": 52370 }, { "epoch": 0.4228183043678309, "grad_norm": 0.6861106157302856, "learning_rate": 1.1551254816119157e-05, "loss": 2.6525611877441406, "step": 52380 }, { "epoch": 0.422899025693598, "grad_norm": 0.8817891478538513, "learning_rate": 1.1549639346380946e-05, "loss": 3.4449321746826174, "step": 52390 }, { "epoch": 0.422979747019365, "grad_norm": 1.0287563800811768, "learning_rate": 1.154802387664273e-05, "loss": 2.800430488586426, "step": 52400 }, { "epoch": 0.4230604683451321, "grad_norm": 1.370529294013977, "learning_rate": 1.154640840690452e-05, "loss": 3.2158634185791017, "step": 52410 }, { "epoch": 0.42314118967089914, "grad_norm": 0.801335334777832, "learning_rate": 1.1544792937166304e-05, "loss": 3.1259719848632814, "step": 52420 }, { "epoch": 0.42322191099666623, "grad_norm": 1.088631510734558, "learning_rate": 1.1543177467428093e-05, "loss": 2.8324615478515627, "step": 52430 }, { "epoch": 0.42330263232243326, "grad_norm": 0.8937702178955078, "learning_rate": 1.1541561997689878e-05, "loss": 2.5746763229370115, "step": 52440 }, { "epoch": 0.42338335364820034, "grad_norm": 0.6335399746894836, "learning_rate": 1.1539946527951667e-05, "loss": 2.656805419921875, "step": 52450 }, { "epoch": 0.4234640749739674, "grad_norm": 1.291532039642334, "learning_rate": 1.1538331058213451e-05, "loss": 3.427129364013672, "step": 52460 }, { "epoch": 0.4235447962997344, "grad_norm": 0.5858315229415894, "learning_rate": 1.153671558847524e-05, "loss": 2.9911998748779296, "step": 52470 }, { "epoch": 0.4236255176255015, "grad_norm": 1.4906370639801025, "learning_rate": 1.1535100118737027e-05, "loss": 3.1752035140991213, "step": 52480 }, { "epoch": 0.4237062389512685, "grad_norm": 1.0209976434707642, "learning_rate": 1.1533484648998814e-05, "loss": 2.801957893371582, "step": 52490 }, { "epoch": 0.4237869602770356, "grad_norm": 0.7533528804779053, "learning_rate": 1.15318691792606e-05, "loss": 3.1426244735717774, "step": 52500 }, { "epoch": 0.42386768160280264, "grad_norm": 0.8464175462722778, "learning_rate": 1.1530253709522388e-05, "loss": 3.2174102783203127, "step": 52510 }, { "epoch": 0.4239484029285697, "grad_norm": 0.8571789860725403, "learning_rate": 1.1528638239784174e-05, "loss": 2.5853094100952148, "step": 52520 }, { "epoch": 0.42402912425433675, "grad_norm": 1.1198245286941528, "learning_rate": 1.1527022770045962e-05, "loss": 2.699977493286133, "step": 52530 }, { "epoch": 0.4241098455801038, "grad_norm": 1.0844143629074097, "learning_rate": 1.1525407300307748e-05, "loss": 2.984136390686035, "step": 52540 }, { "epoch": 0.42419056690587087, "grad_norm": 1.1944866180419922, "learning_rate": 1.1523791830569536e-05, "loss": 2.800543785095215, "step": 52550 }, { "epoch": 0.4242712882316379, "grad_norm": 0.8139249086380005, "learning_rate": 1.1522176360831322e-05, "loss": 2.8683732986450194, "step": 52560 }, { "epoch": 0.424352009557405, "grad_norm": 0.9729796051979065, "learning_rate": 1.152056089109311e-05, "loss": 2.836868667602539, "step": 52570 }, { "epoch": 0.424432730883172, "grad_norm": 0.8390598297119141, "learning_rate": 1.1518945421354895e-05, "loss": 2.4431318283081054, "step": 52580 }, { "epoch": 0.4245134522089391, "grad_norm": 0.46602514386177063, "learning_rate": 1.1517329951616683e-05, "loss": 2.792426872253418, "step": 52590 }, { "epoch": 0.42459417353470613, "grad_norm": 0.9934483766555786, "learning_rate": 1.1515714481878469e-05, "loss": 3.3621498107910157, "step": 52600 }, { "epoch": 0.42467489486047316, "grad_norm": 1.0522459745407104, "learning_rate": 1.1514099012140257e-05, "loss": 2.8508136749267576, "step": 52610 }, { "epoch": 0.42475561618624025, "grad_norm": 1.0497173070907593, "learning_rate": 1.1512483542402043e-05, "loss": 3.053662872314453, "step": 52620 }, { "epoch": 0.4248363375120073, "grad_norm": 0.7707222104072571, "learning_rate": 1.151086807266383e-05, "loss": 2.8970458984375, "step": 52630 }, { "epoch": 0.42491705883777436, "grad_norm": 1.1696912050247192, "learning_rate": 1.1509252602925616e-05, "loss": 2.936478042602539, "step": 52640 }, { "epoch": 0.4249977801635414, "grad_norm": 0.721519947052002, "learning_rate": 1.1507637133187404e-05, "loss": 2.867885398864746, "step": 52650 }, { "epoch": 0.4250785014893085, "grad_norm": 1.362553358078003, "learning_rate": 1.150602166344919e-05, "loss": 2.7816564559936525, "step": 52660 }, { "epoch": 0.4251592228150755, "grad_norm": 0.7294312119483948, "learning_rate": 1.1504406193710978e-05, "loss": 2.748587989807129, "step": 52670 }, { "epoch": 0.4252399441408426, "grad_norm": 0.8481548428535461, "learning_rate": 1.1502790723972764e-05, "loss": 2.85473518371582, "step": 52680 }, { "epoch": 0.4253206654666096, "grad_norm": 1.1052467823028564, "learning_rate": 1.1501175254234551e-05, "loss": 2.7485742568969727, "step": 52690 }, { "epoch": 0.42540138679237666, "grad_norm": 0.5819317698478699, "learning_rate": 1.1499559784496337e-05, "loss": 3.133609962463379, "step": 52700 }, { "epoch": 0.42548210811814374, "grad_norm": 0.9835370182991028, "learning_rate": 1.1497944314758125e-05, "loss": 2.7823381423950195, "step": 52710 }, { "epoch": 0.4255628294439108, "grad_norm": 0.6923284530639648, "learning_rate": 1.1496328845019911e-05, "loss": 2.7358072280883787, "step": 52720 }, { "epoch": 0.42564355076967786, "grad_norm": 1.5059465169906616, "learning_rate": 1.1494713375281699e-05, "loss": 2.6145160675048826, "step": 52730 }, { "epoch": 0.4257242720954449, "grad_norm": 0.9978411197662354, "learning_rate": 1.1493097905543485e-05, "loss": 3.0507699966430666, "step": 52740 }, { "epoch": 0.425804993421212, "grad_norm": 0.7838388681411743, "learning_rate": 1.1491482435805272e-05, "loss": 2.5820512771606445, "step": 52750 }, { "epoch": 0.425885714746979, "grad_norm": 1.1448347568511963, "learning_rate": 1.1489866966067058e-05, "loss": 2.7058221817016603, "step": 52760 }, { "epoch": 0.42596643607274604, "grad_norm": 0.964978039264679, "learning_rate": 1.1488251496328846e-05, "loss": 2.8014541625976563, "step": 52770 }, { "epoch": 0.4260471573985131, "grad_norm": 0.9197051525115967, "learning_rate": 1.1486636026590632e-05, "loss": 2.6584426879882814, "step": 52780 }, { "epoch": 0.42612787872428015, "grad_norm": 1.1982539892196655, "learning_rate": 1.148502055685242e-05, "loss": 3.355647659301758, "step": 52790 }, { "epoch": 0.42620860005004724, "grad_norm": 0.8676608800888062, "learning_rate": 1.1483405087114206e-05, "loss": 2.8423404693603516, "step": 52800 }, { "epoch": 0.42628932137581427, "grad_norm": 0.9508224725723267, "learning_rate": 1.1481789617375994e-05, "loss": 2.777783203125, "step": 52810 }, { "epoch": 0.42637004270158135, "grad_norm": 1.525904655456543, "learning_rate": 1.148017414763778e-05, "loss": 2.8291765213012696, "step": 52820 }, { "epoch": 0.4264507640273484, "grad_norm": 1.6608922481536865, "learning_rate": 1.1478558677899567e-05, "loss": 2.8555158615112304, "step": 52830 }, { "epoch": 0.4265314853531154, "grad_norm": 0.9323289394378662, "learning_rate": 1.1476943208161353e-05, "loss": 2.6464582443237306, "step": 52840 }, { "epoch": 0.4266122066788825, "grad_norm": 1.1139867305755615, "learning_rate": 1.1475327738423141e-05, "loss": 3.0273765563964843, "step": 52850 }, { "epoch": 0.42669292800464953, "grad_norm": 0.8897926211357117, "learning_rate": 1.1473712268684927e-05, "loss": 2.7951412200927734, "step": 52860 }, { "epoch": 0.4267736493304166, "grad_norm": 0.9897767305374146, "learning_rate": 1.1472096798946715e-05, "loss": 2.840167427062988, "step": 52870 }, { "epoch": 0.42685437065618365, "grad_norm": 0.7996095418930054, "learning_rate": 1.14704813292085e-05, "loss": 3.11988525390625, "step": 52880 }, { "epoch": 0.42693509198195073, "grad_norm": 0.9739488363265991, "learning_rate": 1.1468865859470288e-05, "loss": 3.085150146484375, "step": 52890 }, { "epoch": 0.42701581330771776, "grad_norm": 1.1027690172195435, "learning_rate": 1.1467250389732076e-05, "loss": 2.62481803894043, "step": 52900 }, { "epoch": 0.42709653463348485, "grad_norm": 1.1007119417190552, "learning_rate": 1.1465634919993862e-05, "loss": 2.734344482421875, "step": 52910 }, { "epoch": 0.4271772559592519, "grad_norm": 1.1318773031234741, "learning_rate": 1.146401945025565e-05, "loss": 3.0455253601074217, "step": 52920 }, { "epoch": 0.4272579772850189, "grad_norm": 1.2894961833953857, "learning_rate": 1.1462403980517436e-05, "loss": 3.2912624359130858, "step": 52930 }, { "epoch": 0.427338698610786, "grad_norm": 1.2869627475738525, "learning_rate": 1.1460788510779223e-05, "loss": 3.180306816101074, "step": 52940 }, { "epoch": 0.427419419936553, "grad_norm": 0.7720253467559814, "learning_rate": 1.145917304104101e-05, "loss": 2.6887449264526366, "step": 52950 }, { "epoch": 0.4275001412623201, "grad_norm": 1.0672264099121094, "learning_rate": 1.1457557571302797e-05, "loss": 2.751497268676758, "step": 52960 }, { "epoch": 0.42758086258808714, "grad_norm": 0.9935447573661804, "learning_rate": 1.1455942101564583e-05, "loss": 3.051324462890625, "step": 52970 }, { "epoch": 0.4276615839138542, "grad_norm": 0.8788437843322754, "learning_rate": 1.145432663182637e-05, "loss": 3.044902801513672, "step": 52980 }, { "epoch": 0.42774230523962126, "grad_norm": 1.3015918731689453, "learning_rate": 1.1452711162088157e-05, "loss": 3.059374237060547, "step": 52990 }, { "epoch": 0.4278230265653883, "grad_norm": 1.2419737577438354, "learning_rate": 1.1451095692349944e-05, "loss": 2.9539703369140624, "step": 53000 }, { "epoch": 0.4279037478911554, "grad_norm": 0.817730188369751, "learning_rate": 1.144948022261173e-05, "loss": 2.9186288833618166, "step": 53010 }, { "epoch": 0.4279844692169224, "grad_norm": 0.7001953125, "learning_rate": 1.1447864752873518e-05, "loss": 2.549344062805176, "step": 53020 }, { "epoch": 0.4280651905426895, "grad_norm": 1.698776125907898, "learning_rate": 1.1446249283135304e-05, "loss": 3.5304328918457033, "step": 53030 }, { "epoch": 0.4281459118684565, "grad_norm": 0.9969680309295654, "learning_rate": 1.1444633813397092e-05, "loss": 2.8285694122314453, "step": 53040 }, { "epoch": 0.4282266331942236, "grad_norm": 1.088599681854248, "learning_rate": 1.1443018343658878e-05, "loss": 2.8019203186035155, "step": 53050 }, { "epoch": 0.42830735451999064, "grad_norm": 0.7274779677391052, "learning_rate": 1.1441402873920666e-05, "loss": 2.557719612121582, "step": 53060 }, { "epoch": 0.42838807584575767, "grad_norm": 0.8416826128959656, "learning_rate": 1.1439787404182452e-05, "loss": 2.922469139099121, "step": 53070 }, { "epoch": 0.42846879717152475, "grad_norm": 1.160080909729004, "learning_rate": 1.143817193444424e-05, "loss": 2.9192226409912108, "step": 53080 }, { "epoch": 0.4285495184972918, "grad_norm": 1.028444766998291, "learning_rate": 1.1436556464706025e-05, "loss": 2.699098014831543, "step": 53090 }, { "epoch": 0.42863023982305887, "grad_norm": 0.6211695075035095, "learning_rate": 1.1434940994967813e-05, "loss": 2.538715934753418, "step": 53100 }, { "epoch": 0.4287109611488259, "grad_norm": 1.141937494277954, "learning_rate": 1.1433325525229599e-05, "loss": 2.7584829330444336, "step": 53110 }, { "epoch": 0.428791682474593, "grad_norm": 0.9490571022033691, "learning_rate": 1.1431710055491387e-05, "loss": 2.9554813385009764, "step": 53120 }, { "epoch": 0.42887240380036, "grad_norm": 1.2010204792022705, "learning_rate": 1.1430094585753173e-05, "loss": 2.9353315353393556, "step": 53130 }, { "epoch": 0.42895312512612704, "grad_norm": 0.9886471629142761, "learning_rate": 1.142847911601496e-05, "loss": 2.9655380249023438, "step": 53140 }, { "epoch": 0.42903384645189413, "grad_norm": 0.7439426183700562, "learning_rate": 1.1426863646276746e-05, "loss": 2.7638702392578125, "step": 53150 }, { "epoch": 0.42911456777766116, "grad_norm": 1.023970603942871, "learning_rate": 1.1425248176538534e-05, "loss": 2.5515377044677736, "step": 53160 }, { "epoch": 0.42919528910342825, "grad_norm": 1.1947554349899292, "learning_rate": 1.142363270680032e-05, "loss": 2.9563051223754884, "step": 53170 }, { "epoch": 0.4292760104291953, "grad_norm": 1.171837568283081, "learning_rate": 1.142201723706211e-05, "loss": 2.2695333480834963, "step": 53180 }, { "epoch": 0.42935673175496236, "grad_norm": 0.6565268635749817, "learning_rate": 1.1420401767323894e-05, "loss": 2.7483295440673827, "step": 53190 }, { "epoch": 0.4294374530807294, "grad_norm": 0.9260825514793396, "learning_rate": 1.1418786297585683e-05, "loss": 2.8800533294677733, "step": 53200 }, { "epoch": 0.4295181744064965, "grad_norm": 0.8192943334579468, "learning_rate": 1.1417170827847467e-05, "loss": 2.7174457550048827, "step": 53210 }, { "epoch": 0.4295988957322635, "grad_norm": 1.279220461845398, "learning_rate": 1.1415555358109257e-05, "loss": 3.3930385589599608, "step": 53220 }, { "epoch": 0.42967961705803054, "grad_norm": 1.2539818286895752, "learning_rate": 1.1413939888371041e-05, "loss": 2.527225303649902, "step": 53230 }, { "epoch": 0.4297603383837976, "grad_norm": 1.3068941831588745, "learning_rate": 1.141232441863283e-05, "loss": 2.7608335494995115, "step": 53240 }, { "epoch": 0.42984105970956465, "grad_norm": 0.6280202269554138, "learning_rate": 1.1410708948894615e-05, "loss": 3.13204402923584, "step": 53250 }, { "epoch": 0.42992178103533174, "grad_norm": 1.117465615272522, "learning_rate": 1.1409093479156404e-05, "loss": 3.308214569091797, "step": 53260 }, { "epoch": 0.43000250236109877, "grad_norm": 1.0498411655426025, "learning_rate": 1.1407478009418188e-05, "loss": 2.953590965270996, "step": 53270 }, { "epoch": 0.43008322368686586, "grad_norm": 0.8690893054008484, "learning_rate": 1.1405862539679978e-05, "loss": 2.7360382080078125, "step": 53280 }, { "epoch": 0.4301639450126329, "grad_norm": 1.1526607275009155, "learning_rate": 1.1404247069941762e-05, "loss": 2.983005142211914, "step": 53290 }, { "epoch": 0.4302446663383999, "grad_norm": 1.043663501739502, "learning_rate": 1.1402631600203552e-05, "loss": 2.7365737915039063, "step": 53300 }, { "epoch": 0.430325387664167, "grad_norm": 0.7940947413444519, "learning_rate": 1.1401016130465336e-05, "loss": 2.9685150146484376, "step": 53310 }, { "epoch": 0.43040610898993403, "grad_norm": 0.8018822073936462, "learning_rate": 1.1399400660727125e-05, "loss": 3.119671630859375, "step": 53320 }, { "epoch": 0.4304868303157011, "grad_norm": 1.0004310607910156, "learning_rate": 1.139778519098891e-05, "loss": 2.6162899017333983, "step": 53330 }, { "epoch": 0.43056755164146815, "grad_norm": 1.0901134014129639, "learning_rate": 1.1396169721250699e-05, "loss": 3.12374267578125, "step": 53340 }, { "epoch": 0.43064827296723523, "grad_norm": 1.0198848247528076, "learning_rate": 1.1394554251512485e-05, "loss": 2.874217414855957, "step": 53350 }, { "epoch": 0.43072899429300227, "grad_norm": 0.8316198587417603, "learning_rate": 1.1392938781774273e-05, "loss": 2.7300743103027343, "step": 53360 }, { "epoch": 0.4308097156187693, "grad_norm": 0.665136456489563, "learning_rate": 1.1391323312036059e-05, "loss": 2.853561782836914, "step": 53370 }, { "epoch": 0.4308904369445364, "grad_norm": 0.7694548964500427, "learning_rate": 1.1389707842297846e-05, "loss": 3.0757347106933595, "step": 53380 }, { "epoch": 0.4309711582703034, "grad_norm": 1.131862998008728, "learning_rate": 1.1388092372559632e-05, "loss": 2.9571847915649414, "step": 53390 }, { "epoch": 0.4310518795960705, "grad_norm": 0.7953837513923645, "learning_rate": 1.138647690282142e-05, "loss": 2.668475532531738, "step": 53400 }, { "epoch": 0.4311326009218375, "grad_norm": 1.0496525764465332, "learning_rate": 1.1384861433083206e-05, "loss": 2.9693660736083984, "step": 53410 }, { "epoch": 0.4312133222476046, "grad_norm": 0.5189477205276489, "learning_rate": 1.1383245963344994e-05, "loss": 3.1344505310058595, "step": 53420 }, { "epoch": 0.43129404357337164, "grad_norm": 0.9613099694252014, "learning_rate": 1.138163049360678e-05, "loss": 2.5984119415283202, "step": 53430 }, { "epoch": 0.43137476489913873, "grad_norm": 0.9136948585510254, "learning_rate": 1.1380015023868567e-05, "loss": 3.070731353759766, "step": 53440 }, { "epoch": 0.43145548622490576, "grad_norm": 0.7776517868041992, "learning_rate": 1.1378399554130353e-05, "loss": 2.670360565185547, "step": 53450 }, { "epoch": 0.4315362075506728, "grad_norm": 1.972927451133728, "learning_rate": 1.1376784084392141e-05, "loss": 2.8869157791137696, "step": 53460 }, { "epoch": 0.4316169288764399, "grad_norm": 1.3911367654800415, "learning_rate": 1.1375168614653927e-05, "loss": 2.494378662109375, "step": 53470 }, { "epoch": 0.4316976502022069, "grad_norm": 1.0237692594528198, "learning_rate": 1.1373553144915715e-05, "loss": 2.9021333694458007, "step": 53480 }, { "epoch": 0.431778371527974, "grad_norm": 0.910582959651947, "learning_rate": 1.13719376751775e-05, "loss": 2.787793731689453, "step": 53490 }, { "epoch": 0.431859092853741, "grad_norm": 0.9728123545646667, "learning_rate": 1.1370322205439288e-05, "loss": 2.5303199768066404, "step": 53500 }, { "epoch": 0.4319398141795081, "grad_norm": 1.6765085458755493, "learning_rate": 1.1368706735701074e-05, "loss": 2.5852256774902345, "step": 53510 }, { "epoch": 0.43202053550527514, "grad_norm": 0.9115766882896423, "learning_rate": 1.1367091265962862e-05, "loss": 2.6611955642700194, "step": 53520 }, { "epoch": 0.43210125683104217, "grad_norm": 1.4286860227584839, "learning_rate": 1.1365475796224648e-05, "loss": 2.5975515365600588, "step": 53530 }, { "epoch": 0.43218197815680925, "grad_norm": 0.9880300164222717, "learning_rate": 1.1363860326486436e-05, "loss": 2.307305908203125, "step": 53540 }, { "epoch": 0.4322626994825763, "grad_norm": 0.6301001310348511, "learning_rate": 1.1362244856748222e-05, "loss": 2.6206953048706056, "step": 53550 }, { "epoch": 0.43234342080834337, "grad_norm": 1.4848532676696777, "learning_rate": 1.136062938701001e-05, "loss": 2.9152658462524412, "step": 53560 }, { "epoch": 0.4324241421341104, "grad_norm": 1.1172434091567993, "learning_rate": 1.1359013917271795e-05, "loss": 2.971579933166504, "step": 53570 }, { "epoch": 0.4325048634598775, "grad_norm": 1.1312958002090454, "learning_rate": 1.1357398447533583e-05, "loss": 2.6989988327026366, "step": 53580 }, { "epoch": 0.4325855847856445, "grad_norm": 0.8861143589019775, "learning_rate": 1.135578297779537e-05, "loss": 3.5136627197265624, "step": 53590 }, { "epoch": 0.43266630611141155, "grad_norm": 0.8260459899902344, "learning_rate": 1.1354167508057157e-05, "loss": 2.5860467910766602, "step": 53600 }, { "epoch": 0.43274702743717863, "grad_norm": 0.9327852725982666, "learning_rate": 1.1352552038318943e-05, "loss": 2.5164779663085937, "step": 53610 }, { "epoch": 0.43282774876294566, "grad_norm": 0.9704729914665222, "learning_rate": 1.135093656858073e-05, "loss": 2.7024816513061523, "step": 53620 }, { "epoch": 0.43290847008871275, "grad_norm": 0.8975355625152588, "learning_rate": 1.1349321098842517e-05, "loss": 2.7321098327636717, "step": 53630 }, { "epoch": 0.4329891914144798, "grad_norm": 0.7787275910377502, "learning_rate": 1.1347705629104304e-05, "loss": 2.6653045654296874, "step": 53640 }, { "epoch": 0.43306991274024687, "grad_norm": 1.091557264328003, "learning_rate": 1.134609015936609e-05, "loss": 2.9220787048339845, "step": 53650 }, { "epoch": 0.4331506340660139, "grad_norm": 1.073819637298584, "learning_rate": 1.1344474689627878e-05, "loss": 2.9613256454467773, "step": 53660 }, { "epoch": 0.433231355391781, "grad_norm": 1.6681129932403564, "learning_rate": 1.1342859219889664e-05, "loss": 2.7495956420898438, "step": 53670 }, { "epoch": 0.433312076717548, "grad_norm": 0.9405099153518677, "learning_rate": 1.1341243750151452e-05, "loss": 2.3149709701538086, "step": 53680 }, { "epoch": 0.43339279804331504, "grad_norm": 0.7993596792221069, "learning_rate": 1.1339628280413238e-05, "loss": 2.588293266296387, "step": 53690 }, { "epoch": 0.4334735193690821, "grad_norm": 0.8015648126602173, "learning_rate": 1.1338012810675025e-05, "loss": 3.016244125366211, "step": 53700 }, { "epoch": 0.43355424069484916, "grad_norm": 1.8050522804260254, "learning_rate": 1.1336397340936811e-05, "loss": 3.031929588317871, "step": 53710 }, { "epoch": 0.43363496202061624, "grad_norm": 0.6138179898262024, "learning_rate": 1.1334781871198599e-05, "loss": 2.745590019226074, "step": 53720 }, { "epoch": 0.4337156833463833, "grad_norm": 1.1817314624786377, "learning_rate": 1.1333166401460385e-05, "loss": 3.6998939514160156, "step": 53730 }, { "epoch": 0.43379640467215036, "grad_norm": 1.1977887153625488, "learning_rate": 1.1331550931722173e-05, "loss": 2.94934139251709, "step": 53740 }, { "epoch": 0.4338771259979174, "grad_norm": 0.6390905380249023, "learning_rate": 1.1329935461983959e-05, "loss": 3.0500198364257813, "step": 53750 }, { "epoch": 0.4339578473236844, "grad_norm": 0.9643200039863586, "learning_rate": 1.1328319992245746e-05, "loss": 3.050265312194824, "step": 53760 }, { "epoch": 0.4340385686494515, "grad_norm": 1.0721230506896973, "learning_rate": 1.1326704522507532e-05, "loss": 2.9534669876098634, "step": 53770 }, { "epoch": 0.43411928997521854, "grad_norm": 1.2723472118377686, "learning_rate": 1.132508905276932e-05, "loss": 3.743092727661133, "step": 53780 }, { "epoch": 0.4342000113009856, "grad_norm": 0.6159273386001587, "learning_rate": 1.1323473583031106e-05, "loss": 2.51068058013916, "step": 53790 }, { "epoch": 0.43428073262675265, "grad_norm": 0.6857572793960571, "learning_rate": 1.1321858113292894e-05, "loss": 2.527111053466797, "step": 53800 }, { "epoch": 0.43436145395251974, "grad_norm": 1.91791832447052, "learning_rate": 1.132024264355468e-05, "loss": 2.811379814147949, "step": 53810 }, { "epoch": 0.43444217527828677, "grad_norm": 0.9565102458000183, "learning_rate": 1.1318627173816467e-05, "loss": 2.6073257446289064, "step": 53820 }, { "epoch": 0.4345228966040538, "grad_norm": 1.3832194805145264, "learning_rate": 1.1317011704078253e-05, "loss": 3.165982246398926, "step": 53830 }, { "epoch": 0.4346036179298209, "grad_norm": 0.7579542994499207, "learning_rate": 1.1315396234340041e-05, "loss": 2.5515953063964845, "step": 53840 }, { "epoch": 0.4346843392555879, "grad_norm": 1.4171490669250488, "learning_rate": 1.1313780764601827e-05, "loss": 2.6004684448242186, "step": 53850 }, { "epoch": 0.434765060581355, "grad_norm": 0.8864624500274658, "learning_rate": 1.1312165294863615e-05, "loss": 3.0111978530883787, "step": 53860 }, { "epoch": 0.43484578190712203, "grad_norm": 0.8907309174537659, "learning_rate": 1.1310549825125401e-05, "loss": 3.2980804443359375, "step": 53870 }, { "epoch": 0.4349265032328891, "grad_norm": 1.3814843893051147, "learning_rate": 1.1308934355387189e-05, "loss": 2.7982402801513673, "step": 53880 }, { "epoch": 0.43500722455865615, "grad_norm": 0.978254497051239, "learning_rate": 1.1307318885648975e-05, "loss": 2.9110797882080077, "step": 53890 }, { "epoch": 0.43508794588442323, "grad_norm": 0.7013258934020996, "learning_rate": 1.1305703415910762e-05, "loss": 2.574080467224121, "step": 53900 }, { "epoch": 0.43516866721019026, "grad_norm": 0.9038092494010925, "learning_rate": 1.1304087946172548e-05, "loss": 2.6663373947143554, "step": 53910 }, { "epoch": 0.4352493885359573, "grad_norm": 0.7734948396682739, "learning_rate": 1.1302472476434336e-05, "loss": 2.611236572265625, "step": 53920 }, { "epoch": 0.4353301098617244, "grad_norm": 1.0355150699615479, "learning_rate": 1.1300857006696122e-05, "loss": 3.1623825073242187, "step": 53930 }, { "epoch": 0.4354108311874914, "grad_norm": 0.9118136763572693, "learning_rate": 1.129924153695791e-05, "loss": 2.9954383850097654, "step": 53940 }, { "epoch": 0.4354915525132585, "grad_norm": 0.8672341704368591, "learning_rate": 1.1297626067219696e-05, "loss": 2.821781349182129, "step": 53950 }, { "epoch": 0.4355722738390255, "grad_norm": 1.2466893196105957, "learning_rate": 1.1296010597481483e-05, "loss": 2.992295837402344, "step": 53960 }, { "epoch": 0.4356529951647926, "grad_norm": 1.4892737865447998, "learning_rate": 1.129439512774327e-05, "loss": 2.59886474609375, "step": 53970 }, { "epoch": 0.43573371649055964, "grad_norm": 0.9639383554458618, "learning_rate": 1.1292779658005057e-05, "loss": 2.8096778869628904, "step": 53980 }, { "epoch": 0.43581443781632667, "grad_norm": 0.8031639456748962, "learning_rate": 1.1291164188266843e-05, "loss": 2.649763488769531, "step": 53990 }, { "epoch": 0.43589515914209376, "grad_norm": 0.8041160702705383, "learning_rate": 1.128954871852863e-05, "loss": 3.006892204284668, "step": 54000 }, { "epoch": 0.4359758804678608, "grad_norm": 1.0966399908065796, "learning_rate": 1.1287933248790417e-05, "loss": 3.305039978027344, "step": 54010 }, { "epoch": 0.4360566017936279, "grad_norm": 1.1012437343597412, "learning_rate": 1.1286317779052204e-05, "loss": 2.7248788833618165, "step": 54020 }, { "epoch": 0.4361373231193949, "grad_norm": 0.8461646437644958, "learning_rate": 1.128470230931399e-05, "loss": 2.5857097625732424, "step": 54030 }, { "epoch": 0.436218044445162, "grad_norm": 0.6851118206977844, "learning_rate": 1.1283086839575778e-05, "loss": 2.558818054199219, "step": 54040 }, { "epoch": 0.436298765770929, "grad_norm": 1.3281954526901245, "learning_rate": 1.1281471369837564e-05, "loss": 3.1060047149658203, "step": 54050 }, { "epoch": 0.43637948709669605, "grad_norm": 0.7540308237075806, "learning_rate": 1.1279855900099352e-05, "loss": 2.5486053466796874, "step": 54060 }, { "epoch": 0.43646020842246314, "grad_norm": 0.6990309953689575, "learning_rate": 1.1278240430361138e-05, "loss": 3.2104969024658203, "step": 54070 }, { "epoch": 0.43654092974823017, "grad_norm": 1.0469715595245361, "learning_rate": 1.1276624960622925e-05, "loss": 2.7101749420166015, "step": 54080 }, { "epoch": 0.43662165107399725, "grad_norm": 0.9478027820587158, "learning_rate": 1.1275009490884711e-05, "loss": 3.018759536743164, "step": 54090 }, { "epoch": 0.4367023723997643, "grad_norm": 0.8232092261314392, "learning_rate": 1.12733940211465e-05, "loss": 2.443604850769043, "step": 54100 }, { "epoch": 0.43678309372553137, "grad_norm": 0.792631208896637, "learning_rate": 1.1271778551408285e-05, "loss": 2.990594673156738, "step": 54110 }, { "epoch": 0.4368638150512984, "grad_norm": 0.7866932153701782, "learning_rate": 1.1270163081670073e-05, "loss": 3.1715366363525392, "step": 54120 }, { "epoch": 0.4369445363770655, "grad_norm": 1.0840997695922852, "learning_rate": 1.1268547611931859e-05, "loss": 2.812809181213379, "step": 54130 }, { "epoch": 0.4370252577028325, "grad_norm": 0.6094359755516052, "learning_rate": 1.1266932142193647e-05, "loss": 2.98238582611084, "step": 54140 }, { "epoch": 0.43710597902859954, "grad_norm": 1.4115267992019653, "learning_rate": 1.1265316672455436e-05, "loss": 2.5622173309326173, "step": 54150 }, { "epoch": 0.43718670035436663, "grad_norm": 0.6180609464645386, "learning_rate": 1.126370120271722e-05, "loss": 2.9116817474365235, "step": 54160 }, { "epoch": 0.43726742168013366, "grad_norm": 1.0715028047561646, "learning_rate": 1.126208573297901e-05, "loss": 3.0620737075805664, "step": 54170 }, { "epoch": 0.43734814300590075, "grad_norm": 1.2237399816513062, "learning_rate": 1.1260470263240794e-05, "loss": 2.6349910736083983, "step": 54180 }, { "epoch": 0.4374288643316678, "grad_norm": 0.9073886275291443, "learning_rate": 1.1258854793502583e-05, "loss": 2.4256465911865233, "step": 54190 }, { "epoch": 0.43750958565743486, "grad_norm": 0.726507306098938, "learning_rate": 1.1257239323764368e-05, "loss": 2.8700798034667967, "step": 54200 }, { "epoch": 0.4375903069832019, "grad_norm": 0.9678922295570374, "learning_rate": 1.1255623854026157e-05, "loss": 2.510556221008301, "step": 54210 }, { "epoch": 0.4376710283089689, "grad_norm": 0.9706964492797852, "learning_rate": 1.1254008384287943e-05, "loss": 2.6871664047241213, "step": 54220 }, { "epoch": 0.437751749634736, "grad_norm": 1.0630654096603394, "learning_rate": 1.125239291454973e-05, "loss": 2.779988670349121, "step": 54230 }, { "epoch": 0.43783247096050304, "grad_norm": 0.7785186171531677, "learning_rate": 1.1250777444811517e-05, "loss": 2.4723077774047852, "step": 54240 }, { "epoch": 0.4379131922862701, "grad_norm": 1.076715350151062, "learning_rate": 1.1249161975073304e-05, "loss": 2.6960906982421875, "step": 54250 }, { "epoch": 0.43799391361203716, "grad_norm": 0.7872045040130615, "learning_rate": 1.124754650533509e-05, "loss": 2.7395891189575194, "step": 54260 }, { "epoch": 0.43807463493780424, "grad_norm": 0.956192672252655, "learning_rate": 1.1245931035596878e-05, "loss": 3.069386672973633, "step": 54270 }, { "epoch": 0.43815535626357127, "grad_norm": 1.1269705295562744, "learning_rate": 1.1244315565858664e-05, "loss": 2.642266845703125, "step": 54280 }, { "epoch": 0.4382360775893383, "grad_norm": 0.81170654296875, "learning_rate": 1.1242700096120452e-05, "loss": 2.508964729309082, "step": 54290 }, { "epoch": 0.4383167989151054, "grad_norm": 0.9010921716690063, "learning_rate": 1.1241084626382238e-05, "loss": 2.789737319946289, "step": 54300 }, { "epoch": 0.4383975202408724, "grad_norm": 1.2714811563491821, "learning_rate": 1.1239469156644025e-05, "loss": 2.850949287414551, "step": 54310 }, { "epoch": 0.4384782415666395, "grad_norm": 1.0314356088638306, "learning_rate": 1.1237853686905811e-05, "loss": 2.686406707763672, "step": 54320 }, { "epoch": 0.43855896289240653, "grad_norm": 0.7320220470428467, "learning_rate": 1.1236238217167599e-05, "loss": 2.768514060974121, "step": 54330 }, { "epoch": 0.4386396842181736, "grad_norm": 1.2295466661453247, "learning_rate": 1.1234622747429385e-05, "loss": 2.661448860168457, "step": 54340 }, { "epoch": 0.43872040554394065, "grad_norm": 0.7743502855300903, "learning_rate": 1.1233007277691173e-05, "loss": 2.9188711166381838, "step": 54350 }, { "epoch": 0.4388011268697077, "grad_norm": 0.7920743227005005, "learning_rate": 1.1231391807952959e-05, "loss": 3.519997406005859, "step": 54360 }, { "epoch": 0.43888184819547477, "grad_norm": 4.46598482131958, "learning_rate": 1.1229776338214747e-05, "loss": 3.3473884582519533, "step": 54370 }, { "epoch": 0.4389625695212418, "grad_norm": 1.065757393836975, "learning_rate": 1.1228160868476533e-05, "loss": 2.6912139892578124, "step": 54380 }, { "epoch": 0.4390432908470089, "grad_norm": 1.190535545349121, "learning_rate": 1.122654539873832e-05, "loss": 3.139984703063965, "step": 54390 }, { "epoch": 0.4391240121727759, "grad_norm": 0.8486614227294922, "learning_rate": 1.1224929929000106e-05, "loss": 3.2019153594970704, "step": 54400 }, { "epoch": 0.439204733498543, "grad_norm": 1.1706286668777466, "learning_rate": 1.1223314459261894e-05, "loss": 2.7573551177978515, "step": 54410 }, { "epoch": 0.43928545482431003, "grad_norm": 0.6758977174758911, "learning_rate": 1.122169898952368e-05, "loss": 2.812437629699707, "step": 54420 }, { "epoch": 0.4393661761500771, "grad_norm": 1.4169365167617798, "learning_rate": 1.1220083519785468e-05, "loss": 2.8696834564208986, "step": 54430 }, { "epoch": 0.43944689747584414, "grad_norm": 0.8752833604812622, "learning_rate": 1.1218468050047254e-05, "loss": 2.9247055053710938, "step": 54440 }, { "epoch": 0.4395276188016112, "grad_norm": 0.9856658577919006, "learning_rate": 1.1216852580309041e-05, "loss": 2.481760025024414, "step": 54450 }, { "epoch": 0.43960834012737826, "grad_norm": 0.6983155608177185, "learning_rate": 1.1215237110570827e-05, "loss": 2.9204839706420898, "step": 54460 }, { "epoch": 0.4396890614531453, "grad_norm": 0.8823785185813904, "learning_rate": 1.1213621640832615e-05, "loss": 2.7627662658691405, "step": 54470 }, { "epoch": 0.4397697827789124, "grad_norm": 0.8162648677825928, "learning_rate": 1.1212006171094401e-05, "loss": 2.7423608779907225, "step": 54480 }, { "epoch": 0.4398505041046794, "grad_norm": 0.9189209938049316, "learning_rate": 1.1210390701356189e-05, "loss": 3.1562631607055662, "step": 54490 }, { "epoch": 0.4399312254304465, "grad_norm": 0.904869019985199, "learning_rate": 1.1208775231617975e-05, "loss": 2.8008724212646485, "step": 54500 }, { "epoch": 0.4400119467562135, "grad_norm": 1.0117945671081543, "learning_rate": 1.1207159761879762e-05, "loss": 3.1976547241210938, "step": 54510 }, { "epoch": 0.44009266808198055, "grad_norm": 0.9413555860519409, "learning_rate": 1.1205544292141548e-05, "loss": 2.668750190734863, "step": 54520 }, { "epoch": 0.44017338940774764, "grad_norm": 0.7901748418807983, "learning_rate": 1.1203928822403336e-05, "loss": 2.6803524017333986, "step": 54530 }, { "epoch": 0.44025411073351467, "grad_norm": 1.3329137563705444, "learning_rate": 1.1202313352665122e-05, "loss": 2.7332551956176756, "step": 54540 }, { "epoch": 0.44033483205928176, "grad_norm": 0.6923254132270813, "learning_rate": 1.120069788292691e-05, "loss": 2.571529006958008, "step": 54550 }, { "epoch": 0.4404155533850488, "grad_norm": 0.9839704632759094, "learning_rate": 1.1199082413188696e-05, "loss": 3.4149944305419924, "step": 54560 }, { "epoch": 0.44049627471081587, "grad_norm": 1.106704831123352, "learning_rate": 1.1197466943450483e-05, "loss": 3.3120113372802735, "step": 54570 }, { "epoch": 0.4405769960365829, "grad_norm": 0.7281959652900696, "learning_rate": 1.119585147371227e-05, "loss": 2.688308906555176, "step": 54580 }, { "epoch": 0.44065771736234993, "grad_norm": 0.9876053929328918, "learning_rate": 1.1194236003974057e-05, "loss": 2.9469573974609373, "step": 54590 }, { "epoch": 0.440738438688117, "grad_norm": 0.7823410034179688, "learning_rate": 1.1192620534235843e-05, "loss": 2.8928394317626953, "step": 54600 }, { "epoch": 0.44081916001388405, "grad_norm": 0.8727141618728638, "learning_rate": 1.119100506449763e-05, "loss": 2.6500085830688476, "step": 54610 }, { "epoch": 0.44089988133965113, "grad_norm": 0.9187812805175781, "learning_rate": 1.1189389594759417e-05, "loss": 3.147477149963379, "step": 54620 }, { "epoch": 0.44098060266541816, "grad_norm": 0.6524336338043213, "learning_rate": 1.1187774125021205e-05, "loss": 2.611418914794922, "step": 54630 }, { "epoch": 0.44106132399118525, "grad_norm": 0.8290688991546631, "learning_rate": 1.118615865528299e-05, "loss": 3.2458972930908203, "step": 54640 }, { "epoch": 0.4411420453169523, "grad_norm": 1.0985385179519653, "learning_rate": 1.1184543185544778e-05, "loss": 2.8783750534057617, "step": 54650 }, { "epoch": 0.44122276664271937, "grad_norm": 1.2797250747680664, "learning_rate": 1.1182927715806564e-05, "loss": 2.5291345596313475, "step": 54660 }, { "epoch": 0.4413034879684864, "grad_norm": 0.7351953387260437, "learning_rate": 1.1181312246068352e-05, "loss": 2.724662971496582, "step": 54670 }, { "epoch": 0.4413842092942534, "grad_norm": 0.5729951858520508, "learning_rate": 1.1179696776330138e-05, "loss": 2.7778194427490233, "step": 54680 }, { "epoch": 0.4414649306200205, "grad_norm": 1.1503976583480835, "learning_rate": 1.1178081306591926e-05, "loss": 2.5395246505737306, "step": 54690 }, { "epoch": 0.44154565194578754, "grad_norm": 1.1001980304718018, "learning_rate": 1.1176465836853712e-05, "loss": 2.6601686477661133, "step": 54700 }, { "epoch": 0.44162637327155463, "grad_norm": 1.0013213157653809, "learning_rate": 1.11748503671155e-05, "loss": 2.881320762634277, "step": 54710 }, { "epoch": 0.44170709459732166, "grad_norm": 0.7206723690032959, "learning_rate": 1.1173234897377285e-05, "loss": 3.1304025650024414, "step": 54720 }, { "epoch": 0.44178781592308874, "grad_norm": 1.0503472089767456, "learning_rate": 1.1171619427639073e-05, "loss": 2.674692916870117, "step": 54730 }, { "epoch": 0.4418685372488558, "grad_norm": 0.7593035697937012, "learning_rate": 1.1170003957900859e-05, "loss": 3.1750682830810546, "step": 54740 }, { "epoch": 0.4419492585746228, "grad_norm": 1.0033715963363647, "learning_rate": 1.1168388488162647e-05, "loss": 2.5553077697753905, "step": 54750 }, { "epoch": 0.4420299799003899, "grad_norm": 1.106543779373169, "learning_rate": 1.1166773018424433e-05, "loss": 3.313058853149414, "step": 54760 }, { "epoch": 0.4421107012261569, "grad_norm": 0.8952915072441101, "learning_rate": 1.116515754868622e-05, "loss": 2.9105628967285155, "step": 54770 }, { "epoch": 0.442191422551924, "grad_norm": 0.8511205315589905, "learning_rate": 1.1163542078948006e-05, "loss": 3.4775135040283205, "step": 54780 }, { "epoch": 0.44227214387769104, "grad_norm": 0.5997961163520813, "learning_rate": 1.1161926609209794e-05, "loss": 2.6317129135131836, "step": 54790 }, { "epoch": 0.4423528652034581, "grad_norm": 0.8192728161811829, "learning_rate": 1.116031113947158e-05, "loss": 3.014325714111328, "step": 54800 }, { "epoch": 0.44243358652922515, "grad_norm": 0.7750853896141052, "learning_rate": 1.1158695669733368e-05, "loss": 2.9160255432128905, "step": 54810 }, { "epoch": 0.4425143078549922, "grad_norm": 0.7050466537475586, "learning_rate": 1.1157080199995154e-05, "loss": 2.9940217971801757, "step": 54820 }, { "epoch": 0.44259502918075927, "grad_norm": 0.9013568758964539, "learning_rate": 1.1155464730256941e-05, "loss": 2.811108207702637, "step": 54830 }, { "epoch": 0.4426757505065263, "grad_norm": 1.035195231437683, "learning_rate": 1.1153849260518727e-05, "loss": 2.8204959869384765, "step": 54840 }, { "epoch": 0.4427564718322934, "grad_norm": 0.8588938117027283, "learning_rate": 1.1152233790780515e-05, "loss": 2.4476890563964844, "step": 54850 }, { "epoch": 0.4428371931580604, "grad_norm": 1.4805461168289185, "learning_rate": 1.1150618321042301e-05, "loss": 2.8413286209106445, "step": 54860 }, { "epoch": 0.4429179144838275, "grad_norm": 1.0594288110733032, "learning_rate": 1.1149002851304089e-05, "loss": 2.8388134002685548, "step": 54870 }, { "epoch": 0.44299863580959453, "grad_norm": 0.9749638438224792, "learning_rate": 1.1147387381565875e-05, "loss": 3.5721858978271483, "step": 54880 }, { "epoch": 0.4430793571353616, "grad_norm": 1.275991439819336, "learning_rate": 1.1145771911827663e-05, "loss": 3.2644439697265626, "step": 54890 }, { "epoch": 0.44316007846112865, "grad_norm": 1.1971595287322998, "learning_rate": 1.1144156442089449e-05, "loss": 3.014716911315918, "step": 54900 }, { "epoch": 0.4432407997868957, "grad_norm": 0.6988118290901184, "learning_rate": 1.1142540972351236e-05, "loss": 2.674402618408203, "step": 54910 }, { "epoch": 0.44332152111266276, "grad_norm": 0.8747650980949402, "learning_rate": 1.1140925502613022e-05, "loss": 2.8462011337280275, "step": 54920 }, { "epoch": 0.4434022424384298, "grad_norm": 1.0722683668136597, "learning_rate": 1.113931003287481e-05, "loss": 2.755872344970703, "step": 54930 }, { "epoch": 0.4434829637641969, "grad_norm": 1.3846691846847534, "learning_rate": 1.1137694563136596e-05, "loss": 3.232920837402344, "step": 54940 }, { "epoch": 0.4435636850899639, "grad_norm": 1.6106330156326294, "learning_rate": 1.1136079093398384e-05, "loss": 2.864302635192871, "step": 54950 }, { "epoch": 0.443644406415731, "grad_norm": 1.0197218656539917, "learning_rate": 1.113446362366017e-05, "loss": 2.429338073730469, "step": 54960 }, { "epoch": 0.443725127741498, "grad_norm": 1.2837170362472534, "learning_rate": 1.1132848153921957e-05, "loss": 2.739526557922363, "step": 54970 }, { "epoch": 0.44380584906726506, "grad_norm": 0.8062437772750854, "learning_rate": 1.1131232684183743e-05, "loss": 3.0755599975585937, "step": 54980 }, { "epoch": 0.44388657039303214, "grad_norm": 0.9626379609107971, "learning_rate": 1.1129617214445531e-05, "loss": 3.131260299682617, "step": 54990 }, { "epoch": 0.4439672917187992, "grad_norm": 1.2642674446105957, "learning_rate": 1.1128001744707317e-05, "loss": 2.8276556015014647, "step": 55000 }, { "epoch": 0.44404801304456626, "grad_norm": 1.052341103553772, "learning_rate": 1.1126386274969105e-05, "loss": 2.774621772766113, "step": 55010 }, { "epoch": 0.4441287343703333, "grad_norm": 0.6797616481781006, "learning_rate": 1.112477080523089e-05, "loss": 2.6193235397338865, "step": 55020 }, { "epoch": 0.4442094556961004, "grad_norm": 0.5486013889312744, "learning_rate": 1.1123155335492678e-05, "loss": 3.2164737701416017, "step": 55030 }, { "epoch": 0.4442901770218674, "grad_norm": 0.8739400506019592, "learning_rate": 1.1121539865754464e-05, "loss": 3.0859209060668946, "step": 55040 }, { "epoch": 0.44437089834763444, "grad_norm": 1.1099653244018555, "learning_rate": 1.1119924396016252e-05, "loss": 3.1922021865844727, "step": 55050 }, { "epoch": 0.4444516196734015, "grad_norm": 0.5758503675460815, "learning_rate": 1.1118308926278038e-05, "loss": 2.7044424057006835, "step": 55060 }, { "epoch": 0.44453234099916855, "grad_norm": 0.8768922686576843, "learning_rate": 1.1116693456539826e-05, "loss": 2.7502777099609377, "step": 55070 }, { "epoch": 0.44461306232493564, "grad_norm": 1.094679832458496, "learning_rate": 1.1115077986801612e-05, "loss": 2.901138687133789, "step": 55080 }, { "epoch": 0.44469378365070267, "grad_norm": 0.9161211252212524, "learning_rate": 1.1113462517063401e-05, "loss": 2.8769758224487303, "step": 55090 }, { "epoch": 0.44477450497646975, "grad_norm": 1.2462081909179688, "learning_rate": 1.1111847047325185e-05, "loss": 2.8833728790283204, "step": 55100 }, { "epoch": 0.4448552263022368, "grad_norm": 0.80546635389328, "learning_rate": 1.1110231577586975e-05, "loss": 3.0706855773925783, "step": 55110 }, { "epoch": 0.44493594762800387, "grad_norm": 1.057092547416687, "learning_rate": 1.1108616107848759e-05, "loss": 3.2494186401367187, "step": 55120 }, { "epoch": 0.4450166689537709, "grad_norm": 1.3230559825897217, "learning_rate": 1.1107000638110549e-05, "loss": 3.3413204193115233, "step": 55130 }, { "epoch": 0.44509739027953793, "grad_norm": 0.7617780566215515, "learning_rate": 1.1105385168372333e-05, "loss": 3.2491546630859376, "step": 55140 }, { "epoch": 0.445178111605305, "grad_norm": 1.2954871654510498, "learning_rate": 1.1103769698634122e-05, "loss": 3.1101823806762696, "step": 55150 }, { "epoch": 0.44525883293107205, "grad_norm": 0.6553506255149841, "learning_rate": 1.1102154228895906e-05, "loss": 2.5710777282714843, "step": 55160 }, { "epoch": 0.44533955425683913, "grad_norm": 1.0749000310897827, "learning_rate": 1.1100538759157696e-05, "loss": 2.784911346435547, "step": 55170 }, { "epoch": 0.44542027558260616, "grad_norm": 1.017249584197998, "learning_rate": 1.109892328941948e-05, "loss": 3.3470928192138674, "step": 55180 }, { "epoch": 0.44550099690837325, "grad_norm": 0.944348156452179, "learning_rate": 1.109730781968127e-05, "loss": 2.96374397277832, "step": 55190 }, { "epoch": 0.4455817182341403, "grad_norm": 1.0262649059295654, "learning_rate": 1.1095692349943054e-05, "loss": 2.674052619934082, "step": 55200 }, { "epoch": 0.4456624395599073, "grad_norm": 1.168481707572937, "learning_rate": 1.1094076880204843e-05, "loss": 3.142144775390625, "step": 55210 }, { "epoch": 0.4457431608856744, "grad_norm": 0.8438839912414551, "learning_rate": 1.1092461410466628e-05, "loss": 2.783349800109863, "step": 55220 }, { "epoch": 0.4458238822114414, "grad_norm": 0.9100348949432373, "learning_rate": 1.1090845940728417e-05, "loss": 2.869362449645996, "step": 55230 }, { "epoch": 0.4459046035372085, "grad_norm": 1.0494067668914795, "learning_rate": 1.1089230470990203e-05, "loss": 3.3909770965576174, "step": 55240 }, { "epoch": 0.44598532486297554, "grad_norm": 1.8360594511032104, "learning_rate": 1.108761500125199e-05, "loss": 3.0948278427124025, "step": 55250 }, { "epoch": 0.4460660461887426, "grad_norm": 0.7866554260253906, "learning_rate": 1.1085999531513777e-05, "loss": 2.8319637298583986, "step": 55260 }, { "epoch": 0.44614676751450966, "grad_norm": 0.7557215094566345, "learning_rate": 1.1084384061775564e-05, "loss": 2.6383554458618166, "step": 55270 }, { "epoch": 0.4462274888402767, "grad_norm": 0.8707180023193359, "learning_rate": 1.108276859203735e-05, "loss": 2.467960548400879, "step": 55280 }, { "epoch": 0.4463082101660438, "grad_norm": 0.6939404010772705, "learning_rate": 1.1081153122299138e-05, "loss": 3.1813518524169924, "step": 55290 }, { "epoch": 0.4463889314918108, "grad_norm": 0.9484858512878418, "learning_rate": 1.1079537652560924e-05, "loss": 2.5051700592041017, "step": 55300 }, { "epoch": 0.4464696528175779, "grad_norm": 0.9980307817459106, "learning_rate": 1.1077922182822712e-05, "loss": 3.144033432006836, "step": 55310 }, { "epoch": 0.4465503741433449, "grad_norm": 0.7964704036712646, "learning_rate": 1.1076306713084498e-05, "loss": 3.272591400146484, "step": 55320 }, { "epoch": 0.446631095469112, "grad_norm": 0.8192934989929199, "learning_rate": 1.1074691243346285e-05, "loss": 2.9291097640991213, "step": 55330 }, { "epoch": 0.44671181679487904, "grad_norm": 0.9653670191764832, "learning_rate": 1.1073075773608071e-05, "loss": 3.1828998565673827, "step": 55340 }, { "epoch": 0.4467925381206461, "grad_norm": 0.7874794602394104, "learning_rate": 1.1071460303869859e-05, "loss": 2.3882419586181642, "step": 55350 }, { "epoch": 0.44687325944641315, "grad_norm": 0.7417492866516113, "learning_rate": 1.1069844834131645e-05, "loss": 2.592716407775879, "step": 55360 }, { "epoch": 0.4469539807721802, "grad_norm": 1.1764135360717773, "learning_rate": 1.1068229364393433e-05, "loss": 3.493754577636719, "step": 55370 }, { "epoch": 0.44703470209794727, "grad_norm": 0.7494667172431946, "learning_rate": 1.1066613894655219e-05, "loss": 2.7448518753051756, "step": 55380 }, { "epoch": 0.4471154234237143, "grad_norm": 1.3275415897369385, "learning_rate": 1.1064998424917006e-05, "loss": 2.925777626037598, "step": 55390 }, { "epoch": 0.4471961447494814, "grad_norm": 0.9127994179725647, "learning_rate": 1.1063382955178794e-05, "loss": 2.799169731140137, "step": 55400 }, { "epoch": 0.4472768660752484, "grad_norm": 0.9883284568786621, "learning_rate": 1.106176748544058e-05, "loss": 2.9447843551635744, "step": 55410 }, { "epoch": 0.4473575874010155, "grad_norm": 0.8859753012657166, "learning_rate": 1.1060152015702368e-05, "loss": 3.173587417602539, "step": 55420 }, { "epoch": 0.44743830872678253, "grad_norm": 0.6110430359840393, "learning_rate": 1.1058536545964154e-05, "loss": 3.2261211395263674, "step": 55430 }, { "epoch": 0.44751903005254956, "grad_norm": 1.2007575035095215, "learning_rate": 1.1056921076225942e-05, "loss": 2.5524999618530275, "step": 55440 }, { "epoch": 0.44759975137831665, "grad_norm": 0.7638461589813232, "learning_rate": 1.1055305606487728e-05, "loss": 2.9873538970947267, "step": 55450 }, { "epoch": 0.4476804727040837, "grad_norm": 1.3212534189224243, "learning_rate": 1.1053690136749515e-05, "loss": 3.03619327545166, "step": 55460 }, { "epoch": 0.44776119402985076, "grad_norm": 0.8331343531608582, "learning_rate": 1.1052074667011301e-05, "loss": 2.714725112915039, "step": 55470 }, { "epoch": 0.4478419153556178, "grad_norm": 0.9567274451255798, "learning_rate": 1.1050459197273089e-05, "loss": 2.787758445739746, "step": 55480 }, { "epoch": 0.4479226366813849, "grad_norm": 1.0297858715057373, "learning_rate": 1.1048843727534875e-05, "loss": 2.823489761352539, "step": 55490 }, { "epoch": 0.4480033580071519, "grad_norm": 1.3106801509857178, "learning_rate": 1.1047228257796663e-05, "loss": 2.895111656188965, "step": 55500 }, { "epoch": 0.44808407933291894, "grad_norm": 0.9388282299041748, "learning_rate": 1.1045612788058449e-05, "loss": 2.8810014724731445, "step": 55510 }, { "epoch": 0.448164800658686, "grad_norm": 1.3113572597503662, "learning_rate": 1.1043997318320236e-05, "loss": 3.080189895629883, "step": 55520 }, { "epoch": 0.44824552198445305, "grad_norm": 0.6377514004707336, "learning_rate": 1.1042381848582022e-05, "loss": 2.695252227783203, "step": 55530 }, { "epoch": 0.44832624331022014, "grad_norm": 0.9321157336235046, "learning_rate": 1.104076637884381e-05, "loss": 3.110211944580078, "step": 55540 }, { "epoch": 0.44840696463598717, "grad_norm": 1.4247392416000366, "learning_rate": 1.1039150909105596e-05, "loss": 2.9890474319458007, "step": 55550 }, { "epoch": 0.44848768596175426, "grad_norm": 0.9386150240898132, "learning_rate": 1.1037535439367384e-05, "loss": 3.081279754638672, "step": 55560 }, { "epoch": 0.4485684072875213, "grad_norm": 1.5369282960891724, "learning_rate": 1.103591996962917e-05, "loss": 2.645867919921875, "step": 55570 }, { "epoch": 0.4486491286132883, "grad_norm": 0.776116132736206, "learning_rate": 1.1034304499890957e-05, "loss": 2.7949689865112304, "step": 55580 }, { "epoch": 0.4487298499390554, "grad_norm": 0.8777602910995483, "learning_rate": 1.1032689030152743e-05, "loss": 2.70424861907959, "step": 55590 }, { "epoch": 0.44881057126482243, "grad_norm": 0.8665933609008789, "learning_rate": 1.1031073560414531e-05, "loss": 2.7024133682250975, "step": 55600 }, { "epoch": 0.4488912925905895, "grad_norm": 1.136838674545288, "learning_rate": 1.1029458090676317e-05, "loss": 2.8495922088623047, "step": 55610 }, { "epoch": 0.44897201391635655, "grad_norm": 0.8770694732666016, "learning_rate": 1.1027842620938105e-05, "loss": 3.201333236694336, "step": 55620 }, { "epoch": 0.44905273524212364, "grad_norm": 0.7082319259643555, "learning_rate": 1.102622715119989e-05, "loss": 2.8824417114257814, "step": 55630 }, { "epoch": 0.44913345656789067, "grad_norm": 0.9027127027511597, "learning_rate": 1.1024611681461678e-05, "loss": 2.6284273147583006, "step": 55640 }, { "epoch": 0.44921417789365775, "grad_norm": 1.0428472757339478, "learning_rate": 1.1022996211723464e-05, "loss": 2.827664375305176, "step": 55650 }, { "epoch": 0.4492948992194248, "grad_norm": 0.9686706066131592, "learning_rate": 1.1021380741985252e-05, "loss": 2.598651123046875, "step": 55660 }, { "epoch": 0.4493756205451918, "grad_norm": 1.2733373641967773, "learning_rate": 1.1019765272247038e-05, "loss": 3.027500343322754, "step": 55670 }, { "epoch": 0.4494563418709589, "grad_norm": 0.8644294738769531, "learning_rate": 1.1018149802508826e-05, "loss": 2.869489860534668, "step": 55680 }, { "epoch": 0.44953706319672593, "grad_norm": 1.059316635131836, "learning_rate": 1.1016534332770612e-05, "loss": 2.8413076400756836, "step": 55690 }, { "epoch": 0.449617784522493, "grad_norm": 0.8005304336547852, "learning_rate": 1.10149188630324e-05, "loss": 3.0764556884765626, "step": 55700 }, { "epoch": 0.44969850584826004, "grad_norm": 1.0754576921463013, "learning_rate": 1.1013303393294186e-05, "loss": 2.833103561401367, "step": 55710 }, { "epoch": 0.44977922717402713, "grad_norm": 1.0473201274871826, "learning_rate": 1.1011687923555973e-05, "loss": 3.019788360595703, "step": 55720 }, { "epoch": 0.44985994849979416, "grad_norm": 1.0926858186721802, "learning_rate": 1.101007245381776e-05, "loss": 2.6118297576904297, "step": 55730 }, { "epoch": 0.4499406698255612, "grad_norm": 1.0600851774215698, "learning_rate": 1.1008456984079547e-05, "loss": 3.4885021209716798, "step": 55740 }, { "epoch": 0.4500213911513283, "grad_norm": 0.9125143885612488, "learning_rate": 1.1006841514341333e-05, "loss": 2.6310291290283203, "step": 55750 }, { "epoch": 0.4501021124770953, "grad_norm": 0.9986565709114075, "learning_rate": 1.100522604460312e-05, "loss": 3.2717052459716798, "step": 55760 }, { "epoch": 0.4501828338028624, "grad_norm": 1.0336549282073975, "learning_rate": 1.1003610574864907e-05, "loss": 3.116946792602539, "step": 55770 }, { "epoch": 0.4502635551286294, "grad_norm": 1.2041045427322388, "learning_rate": 1.1001995105126694e-05, "loss": 3.041609001159668, "step": 55780 }, { "epoch": 0.4503442764543965, "grad_norm": 1.0513486862182617, "learning_rate": 1.100037963538848e-05, "loss": 2.775295448303223, "step": 55790 }, { "epoch": 0.45042499778016354, "grad_norm": 0.7600595355033875, "learning_rate": 1.0998764165650268e-05, "loss": 2.488296890258789, "step": 55800 }, { "epoch": 0.45050571910593057, "grad_norm": 0.750775158405304, "learning_rate": 1.0997148695912054e-05, "loss": 3.19928035736084, "step": 55810 }, { "epoch": 0.45058644043169765, "grad_norm": 1.0336644649505615, "learning_rate": 1.0995533226173842e-05, "loss": 2.7573169708251952, "step": 55820 }, { "epoch": 0.4506671617574647, "grad_norm": 0.9295055866241455, "learning_rate": 1.0993917756435628e-05, "loss": 2.9429719924926756, "step": 55830 }, { "epoch": 0.45074788308323177, "grad_norm": 0.9507333040237427, "learning_rate": 1.0992302286697415e-05, "loss": 2.5383386611938477, "step": 55840 }, { "epoch": 0.4508286044089988, "grad_norm": 1.3727972507476807, "learning_rate": 1.0990686816959201e-05, "loss": 2.7487077713012695, "step": 55850 }, { "epoch": 0.4509093257347659, "grad_norm": 0.780683696269989, "learning_rate": 1.0989071347220989e-05, "loss": 2.7150251388549806, "step": 55860 }, { "epoch": 0.4509900470605329, "grad_norm": 1.767554759979248, "learning_rate": 1.0987455877482775e-05, "loss": 3.1045976638793946, "step": 55870 }, { "epoch": 0.4510707683863, "grad_norm": 0.5402848124504089, "learning_rate": 1.0985840407744563e-05, "loss": 2.786673164367676, "step": 55880 }, { "epoch": 0.45115148971206703, "grad_norm": 0.7720913887023926, "learning_rate": 1.0984224938006349e-05, "loss": 2.7821788787841797, "step": 55890 }, { "epoch": 0.45123221103783406, "grad_norm": 0.8256193995475769, "learning_rate": 1.0982609468268136e-05, "loss": 2.6054271697998046, "step": 55900 }, { "epoch": 0.45131293236360115, "grad_norm": 0.8062636256217957, "learning_rate": 1.0980993998529922e-05, "loss": 2.794063377380371, "step": 55910 }, { "epoch": 0.4513936536893682, "grad_norm": 0.4820409119129181, "learning_rate": 1.097937852879171e-05, "loss": 2.927368927001953, "step": 55920 }, { "epoch": 0.45147437501513527, "grad_norm": 0.8439788818359375, "learning_rate": 1.0977763059053496e-05, "loss": 2.781175994873047, "step": 55930 }, { "epoch": 0.4515550963409023, "grad_norm": 0.7622150182723999, "learning_rate": 1.0976147589315284e-05, "loss": 2.497509765625, "step": 55940 }, { "epoch": 0.4516358176666694, "grad_norm": 0.8144602179527283, "learning_rate": 1.097453211957707e-05, "loss": 2.774616813659668, "step": 55950 }, { "epoch": 0.4517165389924364, "grad_norm": 1.3323309421539307, "learning_rate": 1.097291664983886e-05, "loss": 2.7202390670776366, "step": 55960 }, { "epoch": 0.45179726031820344, "grad_norm": 1.2000361680984497, "learning_rate": 1.0971301180100644e-05, "loss": 3.0838241577148438, "step": 55970 }, { "epoch": 0.4518779816439705, "grad_norm": 0.6267455220222473, "learning_rate": 1.0969685710362433e-05, "loss": 2.6780628204345702, "step": 55980 }, { "epoch": 0.45195870296973756, "grad_norm": 0.8162587881088257, "learning_rate": 1.0968070240624217e-05, "loss": 2.9361282348632813, "step": 55990 }, { "epoch": 0.45203942429550464, "grad_norm": 0.7098073959350586, "learning_rate": 1.0966454770886007e-05, "loss": 2.511143112182617, "step": 56000 }, { "epoch": 0.4521201456212717, "grad_norm": 0.7857120633125305, "learning_rate": 1.0964839301147791e-05, "loss": 2.855290412902832, "step": 56010 }, { "epoch": 0.45220086694703876, "grad_norm": 1.347429871559143, "learning_rate": 1.096322383140958e-05, "loss": 2.7152027130126952, "step": 56020 }, { "epoch": 0.4522815882728058, "grad_norm": 1.2821743488311768, "learning_rate": 1.0961608361671365e-05, "loss": 2.5412904739379885, "step": 56030 }, { "epoch": 0.4523623095985728, "grad_norm": 1.0051045417785645, "learning_rate": 1.0959992891933154e-05, "loss": 2.649410057067871, "step": 56040 }, { "epoch": 0.4524430309243399, "grad_norm": 1.384917140007019, "learning_rate": 1.0958377422194938e-05, "loss": 2.7290115356445312, "step": 56050 }, { "epoch": 0.45252375225010694, "grad_norm": 1.0388929843902588, "learning_rate": 1.0956761952456728e-05, "loss": 2.4477022171020506, "step": 56060 }, { "epoch": 0.452604473575874, "grad_norm": 0.9793405532836914, "learning_rate": 1.0955146482718512e-05, "loss": 3.1412479400634767, "step": 56070 }, { "epoch": 0.45268519490164105, "grad_norm": 1.0702215433120728, "learning_rate": 1.0953531012980301e-05, "loss": 3.144443893432617, "step": 56080 }, { "epoch": 0.45276591622740814, "grad_norm": 0.8967335224151611, "learning_rate": 1.0951915543242086e-05, "loss": 2.7307544708251954, "step": 56090 }, { "epoch": 0.45284663755317517, "grad_norm": 0.8770086765289307, "learning_rate": 1.0950300073503875e-05, "loss": 2.4571063995361326, "step": 56100 }, { "epoch": 0.45292735887894225, "grad_norm": 1.066886305809021, "learning_rate": 1.094868460376566e-05, "loss": 3.097669792175293, "step": 56110 }, { "epoch": 0.4530080802047093, "grad_norm": 1.134613275527954, "learning_rate": 1.0947069134027449e-05, "loss": 2.7204490661621095, "step": 56120 }, { "epoch": 0.4530888015304763, "grad_norm": 1.2264440059661865, "learning_rate": 1.0945453664289235e-05, "loss": 2.9685348510742187, "step": 56130 }, { "epoch": 0.4531695228562434, "grad_norm": 0.9839764833450317, "learning_rate": 1.0943838194551022e-05, "loss": 2.6595773696899414, "step": 56140 }, { "epoch": 0.45325024418201043, "grad_norm": 0.5814798474311829, "learning_rate": 1.0942222724812808e-05, "loss": 2.7901586532592773, "step": 56150 }, { "epoch": 0.4533309655077775, "grad_norm": 0.6546168327331543, "learning_rate": 1.0940607255074596e-05, "loss": 2.4702829360961913, "step": 56160 }, { "epoch": 0.45341168683354455, "grad_norm": 1.0603117942810059, "learning_rate": 1.0938991785336382e-05, "loss": 2.5980676651000976, "step": 56170 }, { "epoch": 0.45349240815931163, "grad_norm": 0.7532505393028259, "learning_rate": 1.093737631559817e-05, "loss": 2.7926374435424806, "step": 56180 }, { "epoch": 0.45357312948507866, "grad_norm": 1.0490095615386963, "learning_rate": 1.0935760845859956e-05, "loss": 2.8830821990966795, "step": 56190 }, { "epoch": 0.4536538508108457, "grad_norm": 1.3720824718475342, "learning_rate": 1.0934145376121744e-05, "loss": 3.1847089767456054, "step": 56200 }, { "epoch": 0.4537345721366128, "grad_norm": 1.376301884651184, "learning_rate": 1.093252990638353e-05, "loss": 2.951934814453125, "step": 56210 }, { "epoch": 0.4538152934623798, "grad_norm": 0.8581254482269287, "learning_rate": 1.0930914436645317e-05, "loss": 2.685686492919922, "step": 56220 }, { "epoch": 0.4538960147881469, "grad_norm": 1.3424755334854126, "learning_rate": 1.0929298966907103e-05, "loss": 2.5128589630126954, "step": 56230 }, { "epoch": 0.4539767361139139, "grad_norm": 0.6169448494911194, "learning_rate": 1.0927683497168891e-05, "loss": 3.066762161254883, "step": 56240 }, { "epoch": 0.454057457439681, "grad_norm": 1.0885789394378662, "learning_rate": 1.0926068027430677e-05, "loss": 3.165152359008789, "step": 56250 }, { "epoch": 0.45413817876544804, "grad_norm": 0.9826846122741699, "learning_rate": 1.0924452557692465e-05, "loss": 2.697056770324707, "step": 56260 }, { "epoch": 0.45421890009121507, "grad_norm": 0.7362638711929321, "learning_rate": 1.092283708795425e-05, "loss": 2.6549205780029297, "step": 56270 }, { "epoch": 0.45429962141698216, "grad_norm": 0.7041714191436768, "learning_rate": 1.0921221618216038e-05, "loss": 2.6897695541381834, "step": 56280 }, { "epoch": 0.4543803427427492, "grad_norm": 1.458103060722351, "learning_rate": 1.0919606148477824e-05, "loss": 2.9571290969848634, "step": 56290 }, { "epoch": 0.4544610640685163, "grad_norm": 0.9519157409667969, "learning_rate": 1.0917990678739612e-05, "loss": 2.845767784118652, "step": 56300 }, { "epoch": 0.4545417853942833, "grad_norm": 0.7931832671165466, "learning_rate": 1.0916375209001398e-05, "loss": 2.9272430419921873, "step": 56310 }, { "epoch": 0.4546225067200504, "grad_norm": 0.6803493499755859, "learning_rate": 1.0914759739263186e-05, "loss": 3.307830047607422, "step": 56320 }, { "epoch": 0.4547032280458174, "grad_norm": 1.0029405355453491, "learning_rate": 1.0913144269524972e-05, "loss": 2.7637962341308593, "step": 56330 }, { "epoch": 0.4547839493715845, "grad_norm": 0.907984733581543, "learning_rate": 1.091152879978676e-05, "loss": 2.8756589889526367, "step": 56340 }, { "epoch": 0.45486467069735154, "grad_norm": 0.929162323474884, "learning_rate": 1.0909913330048545e-05, "loss": 3.2518253326416016, "step": 56350 }, { "epoch": 0.45494539202311857, "grad_norm": 1.3704370260238647, "learning_rate": 1.0908297860310333e-05, "loss": 3.0382198333740233, "step": 56360 }, { "epoch": 0.45502611334888565, "grad_norm": 0.6703969240188599, "learning_rate": 1.0906682390572119e-05, "loss": 2.5643348693847656, "step": 56370 }, { "epoch": 0.4551068346746527, "grad_norm": 0.7200900912284851, "learning_rate": 1.0905066920833907e-05, "loss": 3.122339057922363, "step": 56380 }, { "epoch": 0.45518755600041977, "grad_norm": 0.6780663132667542, "learning_rate": 1.0903451451095693e-05, "loss": 2.5787811279296875, "step": 56390 }, { "epoch": 0.4552682773261868, "grad_norm": 1.1319078207015991, "learning_rate": 1.090183598135748e-05, "loss": 3.215087127685547, "step": 56400 }, { "epoch": 0.4553489986519539, "grad_norm": 1.1562391519546509, "learning_rate": 1.0900220511619266e-05, "loss": 2.809630584716797, "step": 56410 }, { "epoch": 0.4554297199777209, "grad_norm": 1.204013466835022, "learning_rate": 1.0898605041881054e-05, "loss": 3.2706573486328123, "step": 56420 }, { "epoch": 0.45551044130348795, "grad_norm": 0.8256615996360779, "learning_rate": 1.089698957214284e-05, "loss": 2.508617973327637, "step": 56430 }, { "epoch": 0.45559116262925503, "grad_norm": 1.2868235111236572, "learning_rate": 1.0895374102404628e-05, "loss": 3.139661407470703, "step": 56440 }, { "epoch": 0.45567188395502206, "grad_norm": 0.8805974721908569, "learning_rate": 1.0893758632666414e-05, "loss": 2.4863422393798826, "step": 56450 }, { "epoch": 0.45575260528078915, "grad_norm": 1.68079674243927, "learning_rate": 1.0892143162928202e-05, "loss": 2.762527847290039, "step": 56460 }, { "epoch": 0.4558333266065562, "grad_norm": 0.7430317997932434, "learning_rate": 1.0890527693189988e-05, "loss": 3.1237457275390623, "step": 56470 }, { "epoch": 0.45591404793232326, "grad_norm": 1.025610089302063, "learning_rate": 1.0888912223451775e-05, "loss": 2.739090156555176, "step": 56480 }, { "epoch": 0.4559947692580903, "grad_norm": 0.6881012320518494, "learning_rate": 1.0887296753713561e-05, "loss": 2.9165124893188477, "step": 56490 }, { "epoch": 0.4560754905838573, "grad_norm": 0.7784506678581238, "learning_rate": 1.0885681283975349e-05, "loss": 2.8542974472045897, "step": 56500 }, { "epoch": 0.4561562119096244, "grad_norm": 0.5952300429344177, "learning_rate": 1.0884065814237135e-05, "loss": 2.7320106506347654, "step": 56510 }, { "epoch": 0.45623693323539144, "grad_norm": 1.0104036331176758, "learning_rate": 1.0882450344498923e-05, "loss": 2.818546485900879, "step": 56520 }, { "epoch": 0.4563176545611585, "grad_norm": 0.5237988233566284, "learning_rate": 1.0880834874760709e-05, "loss": 3.0829166412353515, "step": 56530 }, { "epoch": 0.45639837588692556, "grad_norm": 0.9340233206748962, "learning_rate": 1.0879219405022496e-05, "loss": 3.448672866821289, "step": 56540 }, { "epoch": 0.45647909721269264, "grad_norm": 1.1491436958312988, "learning_rate": 1.0877603935284282e-05, "loss": 2.842550277709961, "step": 56550 }, { "epoch": 0.45655981853845967, "grad_norm": 1.105814814567566, "learning_rate": 1.087598846554607e-05, "loss": 2.9120660781860352, "step": 56560 }, { "epoch": 0.45664053986422676, "grad_norm": 0.7979105114936829, "learning_rate": 1.0874372995807856e-05, "loss": 2.740081787109375, "step": 56570 }, { "epoch": 0.4567212611899938, "grad_norm": 0.5972238779067993, "learning_rate": 1.0872757526069644e-05, "loss": 2.7801780700683594, "step": 56580 }, { "epoch": 0.4568019825157608, "grad_norm": 1.113285779953003, "learning_rate": 1.087114205633143e-05, "loss": 2.523554039001465, "step": 56590 }, { "epoch": 0.4568827038415279, "grad_norm": 1.1240044832229614, "learning_rate": 1.0869526586593217e-05, "loss": 3.1100532531738283, "step": 56600 }, { "epoch": 0.45696342516729493, "grad_norm": 1.033201813697815, "learning_rate": 1.0867911116855003e-05, "loss": 3.152985954284668, "step": 56610 }, { "epoch": 0.457044146493062, "grad_norm": 1.0537512302398682, "learning_rate": 1.0866295647116791e-05, "loss": 2.4505022048950194, "step": 56620 }, { "epoch": 0.45712486781882905, "grad_norm": 1.2422027587890625, "learning_rate": 1.0864680177378577e-05, "loss": 2.8885614395141603, "step": 56630 }, { "epoch": 0.45720558914459614, "grad_norm": 0.8670509457588196, "learning_rate": 1.0863064707640365e-05, "loss": 2.747779655456543, "step": 56640 }, { "epoch": 0.45728631047036317, "grad_norm": 1.0638365745544434, "learning_rate": 1.086144923790215e-05, "loss": 3.1072086334228515, "step": 56650 }, { "epoch": 0.4573670317961302, "grad_norm": 0.5565492510795593, "learning_rate": 1.0859833768163938e-05, "loss": 2.679884910583496, "step": 56660 }, { "epoch": 0.4574477531218973, "grad_norm": 0.9144972562789917, "learning_rate": 1.0858218298425726e-05, "loss": 2.7259159088134766, "step": 56670 }, { "epoch": 0.4575284744476643, "grad_norm": 0.9814492464065552, "learning_rate": 1.0856602828687512e-05, "loss": 2.935091018676758, "step": 56680 }, { "epoch": 0.4576091957734314, "grad_norm": 0.9682049751281738, "learning_rate": 1.08549873589493e-05, "loss": 2.5321264266967773, "step": 56690 }, { "epoch": 0.45768991709919843, "grad_norm": 1.5085009336471558, "learning_rate": 1.0853371889211086e-05, "loss": 3.1053686141967773, "step": 56700 }, { "epoch": 0.4577706384249655, "grad_norm": 1.0504826307296753, "learning_rate": 1.0851756419472874e-05, "loss": 3.0843576431274413, "step": 56710 }, { "epoch": 0.45785135975073254, "grad_norm": 1.1274902820587158, "learning_rate": 1.085014094973466e-05, "loss": 2.695456886291504, "step": 56720 }, { "epoch": 0.4579320810764996, "grad_norm": 0.6419852375984192, "learning_rate": 1.0848525479996447e-05, "loss": 2.7145435333251955, "step": 56730 }, { "epoch": 0.45801280240226666, "grad_norm": 1.0486030578613281, "learning_rate": 1.0846910010258233e-05, "loss": 2.939640235900879, "step": 56740 }, { "epoch": 0.4580935237280337, "grad_norm": 0.740515410900116, "learning_rate": 1.0845294540520021e-05, "loss": 2.9000877380371093, "step": 56750 }, { "epoch": 0.4581742450538008, "grad_norm": 1.2406985759735107, "learning_rate": 1.0843679070781807e-05, "loss": 3.0416286468505858, "step": 56760 }, { "epoch": 0.4582549663795678, "grad_norm": 1.2861982583999634, "learning_rate": 1.0842063601043595e-05, "loss": 2.702129936218262, "step": 56770 }, { "epoch": 0.4583356877053349, "grad_norm": 0.5425528287887573, "learning_rate": 1.084044813130538e-05, "loss": 2.6349056243896483, "step": 56780 }, { "epoch": 0.4584164090311019, "grad_norm": 0.8303632736206055, "learning_rate": 1.0838832661567168e-05, "loss": 2.7019363403320313, "step": 56790 }, { "epoch": 0.45849713035686895, "grad_norm": 0.7711937427520752, "learning_rate": 1.0837217191828954e-05, "loss": 2.407855224609375, "step": 56800 }, { "epoch": 0.45857785168263604, "grad_norm": 0.7525988221168518, "learning_rate": 1.0835601722090742e-05, "loss": 2.8867877960205077, "step": 56810 }, { "epoch": 0.45865857300840307, "grad_norm": 1.0614981651306152, "learning_rate": 1.0833986252352528e-05, "loss": 3.1006185531616213, "step": 56820 }, { "epoch": 0.45873929433417016, "grad_norm": 0.6331793665885925, "learning_rate": 1.0832370782614317e-05, "loss": 2.78033561706543, "step": 56830 }, { "epoch": 0.4588200156599372, "grad_norm": 1.053485631942749, "learning_rate": 1.0830755312876102e-05, "loss": 2.8918483734130858, "step": 56840 }, { "epoch": 0.45890073698570427, "grad_norm": 0.9806953072547913, "learning_rate": 1.0829139843137891e-05, "loss": 2.6800159454345702, "step": 56850 }, { "epoch": 0.4589814583114713, "grad_norm": 0.7732498049736023, "learning_rate": 1.0827524373399675e-05, "loss": 2.7778127670288084, "step": 56860 }, { "epoch": 0.4590621796372384, "grad_norm": 0.9822466969490051, "learning_rate": 1.0825908903661465e-05, "loss": 2.6722726821899414, "step": 56870 }, { "epoch": 0.4591429009630054, "grad_norm": 0.6392837762832642, "learning_rate": 1.0824293433923249e-05, "loss": 2.637253761291504, "step": 56880 }, { "epoch": 0.45922362228877245, "grad_norm": 1.100789189338684, "learning_rate": 1.0822677964185038e-05, "loss": 2.5346939086914064, "step": 56890 }, { "epoch": 0.45930434361453953, "grad_norm": 1.0073249340057373, "learning_rate": 1.0821062494446823e-05, "loss": 3.074569320678711, "step": 56900 }, { "epoch": 0.45938506494030656, "grad_norm": 0.8257826566696167, "learning_rate": 1.0819447024708612e-05, "loss": 2.679119873046875, "step": 56910 }, { "epoch": 0.45946578626607365, "grad_norm": 0.8756200671195984, "learning_rate": 1.0817831554970396e-05, "loss": 2.702372932434082, "step": 56920 }, { "epoch": 0.4595465075918407, "grad_norm": 1.512846827507019, "learning_rate": 1.0816216085232186e-05, "loss": 2.5483936309814452, "step": 56930 }, { "epoch": 0.45962722891760777, "grad_norm": 2.5931365489959717, "learning_rate": 1.081460061549397e-05, "loss": 2.584652328491211, "step": 56940 }, { "epoch": 0.4597079502433748, "grad_norm": 0.9570513367652893, "learning_rate": 1.081298514575576e-05, "loss": 2.698634910583496, "step": 56950 }, { "epoch": 0.4597886715691418, "grad_norm": 1.1112693548202515, "learning_rate": 1.0811369676017544e-05, "loss": 2.9356267929077147, "step": 56960 }, { "epoch": 0.4598693928949089, "grad_norm": 1.1197912693023682, "learning_rate": 1.0809754206279333e-05, "loss": 3.102969741821289, "step": 56970 }, { "epoch": 0.45995011422067594, "grad_norm": 0.6492214202880859, "learning_rate": 1.0808138736541117e-05, "loss": 2.7720970153808593, "step": 56980 }, { "epoch": 0.46003083554644303, "grad_norm": 0.7011613845825195, "learning_rate": 1.0806523266802907e-05, "loss": 2.558117485046387, "step": 56990 }, { "epoch": 0.46011155687221006, "grad_norm": 1.1617424488067627, "learning_rate": 1.0804907797064693e-05, "loss": 2.9531377792358398, "step": 57000 }, { "epoch": 0.46019227819797714, "grad_norm": 1.294798493385315, "learning_rate": 1.080329232732648e-05, "loss": 2.413220977783203, "step": 57010 }, { "epoch": 0.4602729995237442, "grad_norm": 1.3349049091339111, "learning_rate": 1.0801676857588267e-05, "loss": 2.9623159408569335, "step": 57020 }, { "epoch": 0.4603537208495112, "grad_norm": 1.1531476974487305, "learning_rate": 1.0800061387850054e-05, "loss": 3.051341438293457, "step": 57030 }, { "epoch": 0.4604344421752783, "grad_norm": 0.695250391960144, "learning_rate": 1.079844591811184e-05, "loss": 2.912724494934082, "step": 57040 }, { "epoch": 0.4605151635010453, "grad_norm": 0.8670916557312012, "learning_rate": 1.0796830448373628e-05, "loss": 2.808028984069824, "step": 57050 }, { "epoch": 0.4605958848268124, "grad_norm": 0.9516810178756714, "learning_rate": 1.0795214978635414e-05, "loss": 2.879707145690918, "step": 57060 }, { "epoch": 0.46067660615257944, "grad_norm": 0.6975018382072449, "learning_rate": 1.0793599508897202e-05, "loss": 2.7425207138061523, "step": 57070 }, { "epoch": 0.4607573274783465, "grad_norm": 1.5177408456802368, "learning_rate": 1.0791984039158988e-05, "loss": 2.779012107849121, "step": 57080 }, { "epoch": 0.46083804880411355, "grad_norm": 1.4559952020645142, "learning_rate": 1.0790368569420775e-05, "loss": 2.5705780029296874, "step": 57090 }, { "epoch": 0.46091877012988064, "grad_norm": 1.003949522972107, "learning_rate": 1.0788753099682561e-05, "loss": 2.8607507705688477, "step": 57100 }, { "epoch": 0.46099949145564767, "grad_norm": 0.9744232296943665, "learning_rate": 1.0787137629944349e-05, "loss": 3.0861677169799804, "step": 57110 }, { "epoch": 0.4610802127814147, "grad_norm": 1.1911627054214478, "learning_rate": 1.0785522160206135e-05, "loss": 2.629423904418945, "step": 57120 }, { "epoch": 0.4611609341071818, "grad_norm": 0.9050529599189758, "learning_rate": 1.0783906690467923e-05, "loss": 2.8352432250976562, "step": 57130 }, { "epoch": 0.4612416554329488, "grad_norm": 1.3696236610412598, "learning_rate": 1.0782291220729709e-05, "loss": 2.5301055908203125, "step": 57140 }, { "epoch": 0.4613223767587159, "grad_norm": 0.84292072057724, "learning_rate": 1.0780675750991496e-05, "loss": 2.521263313293457, "step": 57150 }, { "epoch": 0.46140309808448293, "grad_norm": 0.6068512797355652, "learning_rate": 1.0779060281253282e-05, "loss": 2.810685920715332, "step": 57160 }, { "epoch": 0.46148381941025, "grad_norm": 0.7535144686698914, "learning_rate": 1.077744481151507e-05, "loss": 2.7852916717529297, "step": 57170 }, { "epoch": 0.46156454073601705, "grad_norm": 0.638729989528656, "learning_rate": 1.0775829341776856e-05, "loss": 2.939480781555176, "step": 57180 }, { "epoch": 0.4616452620617841, "grad_norm": 1.094649076461792, "learning_rate": 1.0774213872038644e-05, "loss": 2.5032718658447264, "step": 57190 }, { "epoch": 0.46172598338755116, "grad_norm": 0.6544663310050964, "learning_rate": 1.077259840230043e-05, "loss": 2.5931669235229493, "step": 57200 }, { "epoch": 0.4618067047133182, "grad_norm": 0.6346935033798218, "learning_rate": 1.0770982932562217e-05, "loss": 3.1386178970336913, "step": 57210 }, { "epoch": 0.4618874260390853, "grad_norm": 0.8653587698936462, "learning_rate": 1.0769367462824003e-05, "loss": 2.550734519958496, "step": 57220 }, { "epoch": 0.4619681473648523, "grad_norm": 1.5030497312545776, "learning_rate": 1.0767751993085791e-05, "loss": 2.7117233276367188, "step": 57230 }, { "epoch": 0.4620488686906194, "grad_norm": 0.9764187335968018, "learning_rate": 1.0766136523347577e-05, "loss": 3.1637115478515625, "step": 57240 }, { "epoch": 0.4621295900163864, "grad_norm": 1.3418971300125122, "learning_rate": 1.0764521053609365e-05, "loss": 2.425143241882324, "step": 57250 }, { "epoch": 0.46221031134215346, "grad_norm": 1.2476218938827515, "learning_rate": 1.0762905583871151e-05, "loss": 2.6003488540649413, "step": 57260 }, { "epoch": 0.46229103266792054, "grad_norm": 0.7493593096733093, "learning_rate": 1.0761290114132939e-05, "loss": 2.885301399230957, "step": 57270 }, { "epoch": 0.4623717539936876, "grad_norm": 1.0752239227294922, "learning_rate": 1.0759674644394725e-05, "loss": 2.784225845336914, "step": 57280 }, { "epoch": 0.46245247531945466, "grad_norm": 1.351839303970337, "learning_rate": 1.0758059174656512e-05, "loss": 2.707244873046875, "step": 57290 }, { "epoch": 0.4625331966452217, "grad_norm": 1.0284994840621948, "learning_rate": 1.0756443704918298e-05, "loss": 2.7911165237426756, "step": 57300 }, { "epoch": 0.4626139179709888, "grad_norm": 0.8286980390548706, "learning_rate": 1.0754828235180086e-05, "loss": 2.718556213378906, "step": 57310 }, { "epoch": 0.4626946392967558, "grad_norm": 1.301844596862793, "learning_rate": 1.0753212765441872e-05, "loss": 3.0495794296264647, "step": 57320 }, { "epoch": 0.4627753606225229, "grad_norm": 0.7987881898880005, "learning_rate": 1.075159729570366e-05, "loss": 3.016729736328125, "step": 57330 }, { "epoch": 0.4628560819482899, "grad_norm": 0.7875539660453796, "learning_rate": 1.0749981825965446e-05, "loss": 2.7532806396484375, "step": 57340 }, { "epoch": 0.46293680327405695, "grad_norm": 0.827133297920227, "learning_rate": 1.0748366356227233e-05, "loss": 3.3896007537841797, "step": 57350 }, { "epoch": 0.46301752459982404, "grad_norm": 1.1990680694580078, "learning_rate": 1.074675088648902e-05, "loss": 2.6915321350097656, "step": 57360 }, { "epoch": 0.46309824592559107, "grad_norm": 0.9225299954414368, "learning_rate": 1.0745135416750807e-05, "loss": 2.55075626373291, "step": 57370 }, { "epoch": 0.46317896725135815, "grad_norm": 0.6637571454048157, "learning_rate": 1.0743519947012593e-05, "loss": 2.8470678329467773, "step": 57380 }, { "epoch": 0.4632596885771252, "grad_norm": 0.6963948011398315, "learning_rate": 1.074190447727438e-05, "loss": 2.647889518737793, "step": 57390 }, { "epoch": 0.46334040990289227, "grad_norm": 0.9294414520263672, "learning_rate": 1.0740289007536167e-05, "loss": 3.1869834899902343, "step": 57400 }, { "epoch": 0.4634211312286593, "grad_norm": 0.7783032655715942, "learning_rate": 1.0738673537797954e-05, "loss": 3.0922626495361327, "step": 57410 }, { "epoch": 0.46350185255442633, "grad_norm": 1.354236125946045, "learning_rate": 1.073705806805974e-05, "loss": 3.1705720901489256, "step": 57420 }, { "epoch": 0.4635825738801934, "grad_norm": 0.7573928236961365, "learning_rate": 1.0735442598321528e-05, "loss": 2.412888526916504, "step": 57430 }, { "epoch": 0.46366329520596045, "grad_norm": 0.8719596862792969, "learning_rate": 1.0733827128583314e-05, "loss": 2.652400016784668, "step": 57440 }, { "epoch": 0.46374401653172753, "grad_norm": 0.7515410780906677, "learning_rate": 1.0732211658845102e-05, "loss": 2.939257049560547, "step": 57450 }, { "epoch": 0.46382473785749456, "grad_norm": 0.7370734214782715, "learning_rate": 1.0730596189106888e-05, "loss": 2.7766855239868162, "step": 57460 }, { "epoch": 0.46390545918326165, "grad_norm": 0.8525745868682861, "learning_rate": 1.0728980719368675e-05, "loss": 2.5809940338134765, "step": 57470 }, { "epoch": 0.4639861805090287, "grad_norm": 1.1542493104934692, "learning_rate": 1.0727365249630461e-05, "loss": 2.7030002593994142, "step": 57480 }, { "epoch": 0.4640669018347957, "grad_norm": 0.571559488773346, "learning_rate": 1.072574977989225e-05, "loss": 2.6605861663818358, "step": 57490 }, { "epoch": 0.4641476231605628, "grad_norm": 0.707772970199585, "learning_rate": 1.0724134310154035e-05, "loss": 3.533296585083008, "step": 57500 }, { "epoch": 0.4642283444863298, "grad_norm": 1.1593759059906006, "learning_rate": 1.0722518840415823e-05, "loss": 2.8302087783813477, "step": 57510 }, { "epoch": 0.4643090658120969, "grad_norm": 0.7572910785675049, "learning_rate": 1.0720903370677609e-05, "loss": 2.854079246520996, "step": 57520 }, { "epoch": 0.46438978713786394, "grad_norm": 0.7774747610092163, "learning_rate": 1.0719287900939397e-05, "loss": 2.560035514831543, "step": 57530 }, { "epoch": 0.464470508463631, "grad_norm": 1.4476875066757202, "learning_rate": 1.0717672431201183e-05, "loss": 2.8895566940307615, "step": 57540 }, { "epoch": 0.46455122978939806, "grad_norm": 1.0401602983474731, "learning_rate": 1.071605696146297e-05, "loss": 3.023537254333496, "step": 57550 }, { "epoch": 0.46463195111516514, "grad_norm": 0.7957249879837036, "learning_rate": 1.0714441491724756e-05, "loss": 2.5547027587890625, "step": 57560 }, { "epoch": 0.4647126724409322, "grad_norm": 0.9453625082969666, "learning_rate": 1.0712826021986544e-05, "loss": 2.3744888305664062, "step": 57570 }, { "epoch": 0.4647933937666992, "grad_norm": 1.0421350002288818, "learning_rate": 1.071121055224833e-05, "loss": 2.6953397750854493, "step": 57580 }, { "epoch": 0.4648741150924663, "grad_norm": 0.715111255645752, "learning_rate": 1.0709595082510118e-05, "loss": 2.7581388473510744, "step": 57590 }, { "epoch": 0.4649548364182333, "grad_norm": 0.7158635258674622, "learning_rate": 1.0707979612771904e-05, "loss": 3.1270013809204102, "step": 57600 }, { "epoch": 0.4650355577440004, "grad_norm": 0.8841179013252258, "learning_rate": 1.0706364143033691e-05, "loss": 2.4074626922607423, "step": 57610 }, { "epoch": 0.46511627906976744, "grad_norm": 0.6412128806114197, "learning_rate": 1.0704748673295477e-05, "loss": 3.1005468368530273, "step": 57620 }, { "epoch": 0.4651970003955345, "grad_norm": 0.9544959664344788, "learning_rate": 1.0703133203557265e-05, "loss": 2.395547103881836, "step": 57630 }, { "epoch": 0.46527772172130155, "grad_norm": 0.6724302768707275, "learning_rate": 1.0701517733819051e-05, "loss": 3.045485496520996, "step": 57640 }, { "epoch": 0.4653584430470686, "grad_norm": 1.5430021286010742, "learning_rate": 1.0699902264080839e-05, "loss": 3.03366756439209, "step": 57650 }, { "epoch": 0.46543916437283567, "grad_norm": 0.6540620923042297, "learning_rate": 1.0698286794342625e-05, "loss": 2.757236671447754, "step": 57660 }, { "epoch": 0.4655198856986027, "grad_norm": 0.6358168125152588, "learning_rate": 1.0696671324604412e-05, "loss": 2.814748191833496, "step": 57670 }, { "epoch": 0.4656006070243698, "grad_norm": 0.8529707789421082, "learning_rate": 1.0695055854866198e-05, "loss": 2.757254791259766, "step": 57680 }, { "epoch": 0.4656813283501368, "grad_norm": 0.8164579272270203, "learning_rate": 1.0693440385127986e-05, "loss": 3.0514869689941406, "step": 57690 }, { "epoch": 0.4657620496759039, "grad_norm": 0.7430707812309265, "learning_rate": 1.0691824915389772e-05, "loss": 2.601288414001465, "step": 57700 }, { "epoch": 0.46584277100167093, "grad_norm": 1.3018425703048706, "learning_rate": 1.069020944565156e-05, "loss": 3.181534767150879, "step": 57710 }, { "epoch": 0.46592349232743796, "grad_norm": 0.9666791558265686, "learning_rate": 1.0688593975913346e-05, "loss": 2.725058364868164, "step": 57720 }, { "epoch": 0.46600421365320505, "grad_norm": 1.046785831451416, "learning_rate": 1.0686978506175133e-05, "loss": 2.591478729248047, "step": 57730 }, { "epoch": 0.4660849349789721, "grad_norm": 0.49555230140686035, "learning_rate": 1.068536303643692e-05, "loss": 2.8108144760131837, "step": 57740 }, { "epoch": 0.46616565630473916, "grad_norm": 0.5616818070411682, "learning_rate": 1.0683747566698707e-05, "loss": 2.9312009811401367, "step": 57750 }, { "epoch": 0.4662463776305062, "grad_norm": 1.3956505060195923, "learning_rate": 1.0682132096960493e-05, "loss": 2.802711296081543, "step": 57760 }, { "epoch": 0.4663270989562733, "grad_norm": 1.3478929996490479, "learning_rate": 1.068051662722228e-05, "loss": 3.127236557006836, "step": 57770 }, { "epoch": 0.4664078202820403, "grad_norm": 0.7100900411605835, "learning_rate": 1.0678901157484067e-05, "loss": 2.7969804763793946, "step": 57780 }, { "epoch": 0.4664885416078074, "grad_norm": 0.9821594953536987, "learning_rate": 1.0677285687745855e-05, "loss": 2.720338249206543, "step": 57790 }, { "epoch": 0.4665692629335744, "grad_norm": 0.8845499157905579, "learning_rate": 1.067567021800764e-05, "loss": 3.0200448989868165, "step": 57800 }, { "epoch": 0.46664998425934145, "grad_norm": 0.6012082695960999, "learning_rate": 1.0674054748269428e-05, "loss": 2.8341373443603515, "step": 57810 }, { "epoch": 0.46673070558510854, "grad_norm": 0.9342494010925293, "learning_rate": 1.0672439278531214e-05, "loss": 2.9843982696533202, "step": 57820 }, { "epoch": 0.46681142691087557, "grad_norm": 0.8597422242164612, "learning_rate": 1.0670823808793002e-05, "loss": 2.6879751205444338, "step": 57830 }, { "epoch": 0.46689214823664266, "grad_norm": 0.867921769618988, "learning_rate": 1.0669208339054788e-05, "loss": 3.105626678466797, "step": 57840 }, { "epoch": 0.4669728695624097, "grad_norm": 0.8971653580665588, "learning_rate": 1.0667592869316576e-05, "loss": 2.4769588470458985, "step": 57850 }, { "epoch": 0.4670535908881768, "grad_norm": 0.7658554315567017, "learning_rate": 1.0665977399578362e-05, "loss": 2.8835886001586912, "step": 57860 }, { "epoch": 0.4671343122139438, "grad_norm": 0.6016737222671509, "learning_rate": 1.0664361929840151e-05, "loss": 2.9367307662963866, "step": 57870 }, { "epoch": 0.46721503353971083, "grad_norm": 0.817319929599762, "learning_rate": 1.0662746460101935e-05, "loss": 2.8035301208496093, "step": 57880 }, { "epoch": 0.4672957548654779, "grad_norm": 1.0303869247436523, "learning_rate": 1.0661130990363725e-05, "loss": 3.3600486755371093, "step": 57890 }, { "epoch": 0.46737647619124495, "grad_norm": 1.2503529787063599, "learning_rate": 1.0659515520625509e-05, "loss": 2.7419395446777344, "step": 57900 }, { "epoch": 0.46745719751701204, "grad_norm": 0.8396415114402771, "learning_rate": 1.0657900050887298e-05, "loss": 2.861532783508301, "step": 57910 }, { "epoch": 0.46753791884277907, "grad_norm": 0.6857513189315796, "learning_rate": 1.0656284581149086e-05, "loss": 3.2051692962646485, "step": 57920 }, { "epoch": 0.46761864016854615, "grad_norm": 0.9053630232810974, "learning_rate": 1.0654669111410872e-05, "loss": 3.7917659759521483, "step": 57930 }, { "epoch": 0.4676993614943132, "grad_norm": 1.0430487394332886, "learning_rate": 1.065305364167266e-05, "loss": 3.6220603942871095, "step": 57940 }, { "epoch": 0.4677800828200802, "grad_norm": 0.6399372220039368, "learning_rate": 1.0651438171934446e-05, "loss": 2.421398162841797, "step": 57950 }, { "epoch": 0.4678608041458473, "grad_norm": 1.1344552040100098, "learning_rate": 1.0649822702196233e-05, "loss": 2.535122108459473, "step": 57960 }, { "epoch": 0.46794152547161433, "grad_norm": 0.6673045754432678, "learning_rate": 1.064820723245802e-05, "loss": 2.5459772109985352, "step": 57970 }, { "epoch": 0.4680222467973814, "grad_norm": 0.8101799488067627, "learning_rate": 1.0646591762719807e-05, "loss": 2.8937446594238283, "step": 57980 }, { "epoch": 0.46810296812314844, "grad_norm": 1.0032509565353394, "learning_rate": 1.0644976292981593e-05, "loss": 2.8072790145874023, "step": 57990 }, { "epoch": 0.46818368944891553, "grad_norm": 0.8451926708221436, "learning_rate": 1.064336082324338e-05, "loss": 2.799407958984375, "step": 58000 }, { "epoch": 0.46826441077468256, "grad_norm": 0.9705399870872498, "learning_rate": 1.0641745353505167e-05, "loss": 3.3661922454833983, "step": 58010 }, { "epoch": 0.4683451321004496, "grad_norm": 1.0790616273880005, "learning_rate": 1.0640129883766955e-05, "loss": 2.5109636306762697, "step": 58020 }, { "epoch": 0.4684258534262167, "grad_norm": 1.045318841934204, "learning_rate": 1.063851441402874e-05, "loss": 2.8244356155395507, "step": 58030 }, { "epoch": 0.4685065747519837, "grad_norm": 1.8960541486740112, "learning_rate": 1.0636898944290528e-05, "loss": 3.6002593994140626, "step": 58040 }, { "epoch": 0.4685872960777508, "grad_norm": 1.3142634630203247, "learning_rate": 1.0635283474552314e-05, "loss": 3.023516082763672, "step": 58050 }, { "epoch": 0.4686680174035178, "grad_norm": 0.6271942257881165, "learning_rate": 1.0633668004814102e-05, "loss": 2.686606788635254, "step": 58060 }, { "epoch": 0.4687487387292849, "grad_norm": 0.7636549472808838, "learning_rate": 1.0632052535075888e-05, "loss": 3.0051446914672852, "step": 58070 }, { "epoch": 0.46882946005505194, "grad_norm": 1.3436671495437622, "learning_rate": 1.0630437065337676e-05, "loss": 2.843821716308594, "step": 58080 }, { "epoch": 0.468910181380819, "grad_norm": 0.9684677720069885, "learning_rate": 1.0628821595599462e-05, "loss": 3.259787750244141, "step": 58090 }, { "epoch": 0.46899090270658605, "grad_norm": 1.3386951684951782, "learning_rate": 1.062720612586125e-05, "loss": 2.463395118713379, "step": 58100 }, { "epoch": 0.4690716240323531, "grad_norm": 1.1099672317504883, "learning_rate": 1.0625590656123035e-05, "loss": 3.1502193450927733, "step": 58110 }, { "epoch": 0.46915234535812017, "grad_norm": 0.6727647185325623, "learning_rate": 1.0623975186384823e-05, "loss": 2.833863067626953, "step": 58120 }, { "epoch": 0.4692330666838872, "grad_norm": 1.1945191621780396, "learning_rate": 1.0622359716646609e-05, "loss": 2.7422651290893554, "step": 58130 }, { "epoch": 0.4693137880096543, "grad_norm": 1.4187930822372437, "learning_rate": 1.0620744246908397e-05, "loss": 3.3599674224853517, "step": 58140 }, { "epoch": 0.4693945093354213, "grad_norm": 1.0942635536193848, "learning_rate": 1.0619128777170183e-05, "loss": 2.7474123001098634, "step": 58150 }, { "epoch": 0.4694752306611884, "grad_norm": 1.618639349937439, "learning_rate": 1.061751330743197e-05, "loss": 2.8845996856689453, "step": 58160 }, { "epoch": 0.46955595198695543, "grad_norm": 0.7343406081199646, "learning_rate": 1.0615897837693756e-05, "loss": 2.760572052001953, "step": 58170 }, { "epoch": 0.46963667331272246, "grad_norm": 0.6422488689422607, "learning_rate": 1.0614282367955544e-05, "loss": 3.3932109832763673, "step": 58180 }, { "epoch": 0.46971739463848955, "grad_norm": 1.104642629623413, "learning_rate": 1.061266689821733e-05, "loss": 2.9365238189697265, "step": 58190 }, { "epoch": 0.4697981159642566, "grad_norm": 1.0717300176620483, "learning_rate": 1.0611051428479118e-05, "loss": 3.002202606201172, "step": 58200 }, { "epoch": 0.46987883729002367, "grad_norm": 0.797008216381073, "learning_rate": 1.0609435958740904e-05, "loss": 2.878924560546875, "step": 58210 }, { "epoch": 0.4699595586157907, "grad_norm": 1.028354287147522, "learning_rate": 1.0607820489002691e-05, "loss": 2.443450927734375, "step": 58220 }, { "epoch": 0.4700402799415578, "grad_norm": 0.9934611320495605, "learning_rate": 1.0606205019264477e-05, "loss": 2.7803306579589844, "step": 58230 }, { "epoch": 0.4701210012673248, "grad_norm": 1.0392327308654785, "learning_rate": 1.0604589549526265e-05, "loss": 2.860991859436035, "step": 58240 }, { "epoch": 0.47020172259309184, "grad_norm": 0.9105669856071472, "learning_rate": 1.0602974079788051e-05, "loss": 2.6398611068725586, "step": 58250 }, { "epoch": 0.47028244391885893, "grad_norm": 0.8491590619087219, "learning_rate": 1.0601358610049839e-05, "loss": 2.6733612060546874, "step": 58260 }, { "epoch": 0.47036316524462596, "grad_norm": 0.947228193283081, "learning_rate": 1.0599743140311625e-05, "loss": 3.0102495193481444, "step": 58270 }, { "epoch": 0.47044388657039304, "grad_norm": 0.6640563607215881, "learning_rate": 1.0598127670573413e-05, "loss": 2.4606550216674803, "step": 58280 }, { "epoch": 0.4705246078961601, "grad_norm": 1.3381482362747192, "learning_rate": 1.0596512200835199e-05, "loss": 2.8413305282592773, "step": 58290 }, { "epoch": 0.47060532922192716, "grad_norm": 0.8631516695022583, "learning_rate": 1.0594896731096986e-05, "loss": 2.8161905288696287, "step": 58300 }, { "epoch": 0.4706860505476942, "grad_norm": 1.040183186531067, "learning_rate": 1.0593281261358772e-05, "loss": 3.129939651489258, "step": 58310 }, { "epoch": 0.4707667718734613, "grad_norm": 1.6358877420425415, "learning_rate": 1.059166579162056e-05, "loss": 2.7682674407958983, "step": 58320 }, { "epoch": 0.4708474931992283, "grad_norm": 0.7919921875, "learning_rate": 1.0590050321882346e-05, "loss": 2.9199039459228517, "step": 58330 }, { "epoch": 0.47092821452499534, "grad_norm": 1.4965442419052124, "learning_rate": 1.0588434852144134e-05, "loss": 2.534988021850586, "step": 58340 }, { "epoch": 0.4710089358507624, "grad_norm": 0.9000974297523499, "learning_rate": 1.058681938240592e-05, "loss": 3.1567157745361327, "step": 58350 }, { "epoch": 0.47108965717652945, "grad_norm": 0.8311294317245483, "learning_rate": 1.0585203912667707e-05, "loss": 2.8141353607177733, "step": 58360 }, { "epoch": 0.47117037850229654, "grad_norm": 1.2444871664047241, "learning_rate": 1.0583588442929493e-05, "loss": 2.4567914962768556, "step": 58370 }, { "epoch": 0.47125109982806357, "grad_norm": 1.110603928565979, "learning_rate": 1.0581972973191281e-05, "loss": 2.9967145919799805, "step": 58380 }, { "epoch": 0.47133182115383065, "grad_norm": 0.7814074754714966, "learning_rate": 1.0580357503453067e-05, "loss": 2.788631820678711, "step": 58390 }, { "epoch": 0.4714125424795977, "grad_norm": 0.9080867171287537, "learning_rate": 1.0578742033714855e-05, "loss": 2.721775436401367, "step": 58400 }, { "epoch": 0.4714932638053647, "grad_norm": 1.0620577335357666, "learning_rate": 1.057712656397664e-05, "loss": 2.755481719970703, "step": 58410 }, { "epoch": 0.4715739851311318, "grad_norm": 0.7138882279396057, "learning_rate": 1.0575511094238428e-05, "loss": 2.4404979705810548, "step": 58420 }, { "epoch": 0.47165470645689883, "grad_norm": 2.4397122859954834, "learning_rate": 1.0573895624500214e-05, "loss": 3.3513465881347657, "step": 58430 }, { "epoch": 0.4717354277826659, "grad_norm": 1.0478413105010986, "learning_rate": 1.0572280154762002e-05, "loss": 2.9178449630737306, "step": 58440 }, { "epoch": 0.47181614910843295, "grad_norm": 0.6916263103485107, "learning_rate": 1.0570664685023788e-05, "loss": 3.756277084350586, "step": 58450 }, { "epoch": 0.47189687043420003, "grad_norm": 1.28110671043396, "learning_rate": 1.0569049215285576e-05, "loss": 3.1317304611206054, "step": 58460 }, { "epoch": 0.47197759175996706, "grad_norm": 0.7425562739372253, "learning_rate": 1.0567433745547362e-05, "loss": 2.53326530456543, "step": 58470 }, { "epoch": 0.4720583130857341, "grad_norm": 1.3036479949951172, "learning_rate": 1.056581827580915e-05, "loss": 2.460016632080078, "step": 58480 }, { "epoch": 0.4721390344115012, "grad_norm": 2.5065724849700928, "learning_rate": 1.0564202806070935e-05, "loss": 2.750913619995117, "step": 58490 }, { "epoch": 0.4722197557372682, "grad_norm": 1.3469979763031006, "learning_rate": 1.0562587336332723e-05, "loss": 2.5032985687255858, "step": 58500 }, { "epoch": 0.4723004770630353, "grad_norm": 0.9029566645622253, "learning_rate": 1.0560971866594509e-05, "loss": 2.549083137512207, "step": 58510 }, { "epoch": 0.4723811983888023, "grad_norm": 0.9175779223442078, "learning_rate": 1.0559356396856297e-05, "loss": 2.897140121459961, "step": 58520 }, { "epoch": 0.4724619197145694, "grad_norm": 1.1707836389541626, "learning_rate": 1.0557740927118083e-05, "loss": 2.8671438217163088, "step": 58530 }, { "epoch": 0.47254264104033644, "grad_norm": 1.319427728652954, "learning_rate": 1.055612545737987e-05, "loss": 2.8205282211303713, "step": 58540 }, { "epoch": 0.4726233623661035, "grad_norm": 0.7220478057861328, "learning_rate": 1.0554509987641657e-05, "loss": 2.5614938735961914, "step": 58550 }, { "epoch": 0.47270408369187056, "grad_norm": 0.9778963923454285, "learning_rate": 1.0552894517903444e-05, "loss": 2.5719444274902346, "step": 58560 }, { "epoch": 0.4727848050176376, "grad_norm": 1.2866171598434448, "learning_rate": 1.055127904816523e-05, "loss": 2.6009441375732423, "step": 58570 }, { "epoch": 0.4728655263434047, "grad_norm": 1.170823097229004, "learning_rate": 1.0549663578427018e-05, "loss": 2.6912145614624023, "step": 58580 }, { "epoch": 0.4729462476691717, "grad_norm": 0.9828187227249146, "learning_rate": 1.0548048108688804e-05, "loss": 3.2268272399902345, "step": 58590 }, { "epoch": 0.4730269689949388, "grad_norm": 0.8000579476356506, "learning_rate": 1.0546432638950592e-05, "loss": 3.3098377227783202, "step": 58600 }, { "epoch": 0.4731076903207058, "grad_norm": 0.6888050436973572, "learning_rate": 1.0544817169212378e-05, "loss": 3.3084426879882813, "step": 58610 }, { "epoch": 0.4731884116464729, "grad_norm": 1.2777562141418457, "learning_rate": 1.0543201699474165e-05, "loss": 2.941416549682617, "step": 58620 }, { "epoch": 0.47326913297223994, "grad_norm": 0.996080219745636, "learning_rate": 1.0541586229735951e-05, "loss": 2.9975719451904297, "step": 58630 }, { "epoch": 0.47334985429800697, "grad_norm": 1.1462187767028809, "learning_rate": 1.0539970759997739e-05, "loss": 2.931951713562012, "step": 58640 }, { "epoch": 0.47343057562377405, "grad_norm": 0.8077395558357239, "learning_rate": 1.0538355290259525e-05, "loss": 2.7713829040527345, "step": 58650 }, { "epoch": 0.4735112969495411, "grad_norm": 0.9597742557525635, "learning_rate": 1.0536739820521313e-05, "loss": 2.400982475280762, "step": 58660 }, { "epoch": 0.47359201827530817, "grad_norm": 1.180037498474121, "learning_rate": 1.0535124350783099e-05, "loss": 3.1305622100830077, "step": 58670 }, { "epoch": 0.4736727396010752, "grad_norm": 1.0796476602554321, "learning_rate": 1.0533508881044886e-05, "loss": 2.9859695434570312, "step": 58680 }, { "epoch": 0.4737534609268423, "grad_norm": 0.8988416790962219, "learning_rate": 1.0531893411306672e-05, "loss": 3.0065067291259764, "step": 58690 }, { "epoch": 0.4738341822526093, "grad_norm": 1.1440362930297852, "learning_rate": 1.053027794156846e-05, "loss": 2.520232582092285, "step": 58700 }, { "epoch": 0.47391490357837635, "grad_norm": 0.7494267821311951, "learning_rate": 1.0528662471830246e-05, "loss": 2.481911849975586, "step": 58710 }, { "epoch": 0.47399562490414343, "grad_norm": 0.5778793096542358, "learning_rate": 1.0527047002092034e-05, "loss": 2.6726959228515623, "step": 58720 }, { "epoch": 0.47407634622991046, "grad_norm": 0.5580706000328064, "learning_rate": 1.052543153235382e-05, "loss": 2.8024667739868163, "step": 58730 }, { "epoch": 0.47415706755567755, "grad_norm": 0.7673879861831665, "learning_rate": 1.0523816062615609e-05, "loss": 3.412862014770508, "step": 58740 }, { "epoch": 0.4742377888814446, "grad_norm": 1.2728556394577026, "learning_rate": 1.0522200592877393e-05, "loss": 2.713390922546387, "step": 58750 }, { "epoch": 0.47431851020721166, "grad_norm": 1.1253376007080078, "learning_rate": 1.0520585123139183e-05, "loss": 3.11071834564209, "step": 58760 }, { "epoch": 0.4743992315329787, "grad_norm": 1.8145997524261475, "learning_rate": 1.0518969653400967e-05, "loss": 3.003896141052246, "step": 58770 }, { "epoch": 0.4744799528587458, "grad_norm": 0.6743913888931274, "learning_rate": 1.0517354183662756e-05, "loss": 3.2272533416748046, "step": 58780 }, { "epoch": 0.4745606741845128, "grad_norm": 1.057486891746521, "learning_rate": 1.051573871392454e-05, "loss": 2.807179069519043, "step": 58790 }, { "epoch": 0.47464139551027984, "grad_norm": 1.0455553531646729, "learning_rate": 1.051412324418633e-05, "loss": 2.809644889831543, "step": 58800 }, { "epoch": 0.4747221168360469, "grad_norm": 1.0435963869094849, "learning_rate": 1.0512507774448114e-05, "loss": 3.089949035644531, "step": 58810 }, { "epoch": 0.47480283816181396, "grad_norm": 1.0879524946212769, "learning_rate": 1.0510892304709904e-05, "loss": 2.5557409286499024, "step": 58820 }, { "epoch": 0.47488355948758104, "grad_norm": 1.2558833360671997, "learning_rate": 1.0509276834971688e-05, "loss": 3.04793643951416, "step": 58830 }, { "epoch": 0.47496428081334807, "grad_norm": 0.9080864787101746, "learning_rate": 1.0507661365233478e-05, "loss": 2.8789289474487303, "step": 58840 }, { "epoch": 0.47504500213911516, "grad_norm": 0.6101618409156799, "learning_rate": 1.0506045895495262e-05, "loss": 2.697308349609375, "step": 58850 }, { "epoch": 0.4751257234648822, "grad_norm": 0.6528364419937134, "learning_rate": 1.0504430425757051e-05, "loss": 2.4724668502807616, "step": 58860 }, { "epoch": 0.4752064447906492, "grad_norm": 0.9028708338737488, "learning_rate": 1.0502814956018836e-05, "loss": 2.6988569259643556, "step": 58870 }, { "epoch": 0.4752871661164163, "grad_norm": 0.8765351176261902, "learning_rate": 1.0501199486280625e-05, "loss": 2.8820924758911133, "step": 58880 }, { "epoch": 0.47536788744218333, "grad_norm": 1.1718276739120483, "learning_rate": 1.0499584016542411e-05, "loss": 3.337712860107422, "step": 58890 }, { "epoch": 0.4754486087679504, "grad_norm": 2.1814398765563965, "learning_rate": 1.0497968546804199e-05, "loss": 3.213957977294922, "step": 58900 }, { "epoch": 0.47552933009371745, "grad_norm": 0.6921213269233704, "learning_rate": 1.0496353077065985e-05, "loss": 2.888191795349121, "step": 58910 }, { "epoch": 0.47561005141948454, "grad_norm": 2.3988678455352783, "learning_rate": 1.0494737607327772e-05, "loss": 2.736505317687988, "step": 58920 }, { "epoch": 0.47569077274525157, "grad_norm": 0.8912931084632874, "learning_rate": 1.0493122137589558e-05, "loss": 2.8053237915039064, "step": 58930 }, { "epoch": 0.4757714940710186, "grad_norm": 1.900600552558899, "learning_rate": 1.0491506667851346e-05, "loss": 2.8859531402587892, "step": 58940 }, { "epoch": 0.4758522153967857, "grad_norm": 0.677666962146759, "learning_rate": 1.0489891198113132e-05, "loss": 2.749275779724121, "step": 58950 }, { "epoch": 0.4759329367225527, "grad_norm": 1.2943257093429565, "learning_rate": 1.048827572837492e-05, "loss": 2.587194061279297, "step": 58960 }, { "epoch": 0.4760136580483198, "grad_norm": 1.3031805753707886, "learning_rate": 1.0486660258636706e-05, "loss": 2.5276628494262696, "step": 58970 }, { "epoch": 0.47609437937408683, "grad_norm": 1.0873010158538818, "learning_rate": 1.0485044788898493e-05, "loss": 2.548038673400879, "step": 58980 }, { "epoch": 0.4761751006998539, "grad_norm": 0.8878386616706848, "learning_rate": 1.048342931916028e-05, "loss": 2.690560150146484, "step": 58990 }, { "epoch": 0.47625582202562095, "grad_norm": 1.068799376487732, "learning_rate": 1.0481813849422067e-05, "loss": 2.722807502746582, "step": 59000 }, { "epoch": 0.47633654335138803, "grad_norm": 1.02605140209198, "learning_rate": 1.0480198379683853e-05, "loss": 2.9112289428710936, "step": 59010 }, { "epoch": 0.47641726467715506, "grad_norm": 0.6810266375541687, "learning_rate": 1.047858290994564e-05, "loss": 2.9820730209350588, "step": 59020 }, { "epoch": 0.4764979860029221, "grad_norm": 0.5109571218490601, "learning_rate": 1.0476967440207427e-05, "loss": 2.980790901184082, "step": 59030 }, { "epoch": 0.4765787073286892, "grad_norm": 0.9631869792938232, "learning_rate": 1.0475351970469214e-05, "loss": 3.239153289794922, "step": 59040 }, { "epoch": 0.4766594286544562, "grad_norm": 0.9777318835258484, "learning_rate": 1.0473736500731e-05, "loss": 2.7573266983032227, "step": 59050 }, { "epoch": 0.4767401499802233, "grad_norm": 1.1079927682876587, "learning_rate": 1.0472121030992788e-05, "loss": 3.249617004394531, "step": 59060 }, { "epoch": 0.4768208713059903, "grad_norm": 1.2592824697494507, "learning_rate": 1.0470505561254574e-05, "loss": 2.30045166015625, "step": 59070 }, { "epoch": 0.4769015926317574, "grad_norm": 0.8058668971061707, "learning_rate": 1.0468890091516362e-05, "loss": 2.870101547241211, "step": 59080 }, { "epoch": 0.47698231395752444, "grad_norm": 0.8820027112960815, "learning_rate": 1.0467274621778148e-05, "loss": 3.0217151641845703, "step": 59090 }, { "epoch": 0.47706303528329147, "grad_norm": 1.2703964710235596, "learning_rate": 1.0465659152039936e-05, "loss": 2.8856832504272463, "step": 59100 }, { "epoch": 0.47714375660905856, "grad_norm": 0.7814643979072571, "learning_rate": 1.0464043682301722e-05, "loss": 2.7228010177612303, "step": 59110 }, { "epoch": 0.4772244779348256, "grad_norm": 1.1355308294296265, "learning_rate": 1.046242821256351e-05, "loss": 2.7045902252197265, "step": 59120 }, { "epoch": 0.47730519926059267, "grad_norm": 0.7646715044975281, "learning_rate": 1.0460812742825295e-05, "loss": 2.7088451385498047, "step": 59130 }, { "epoch": 0.4773859205863597, "grad_norm": 0.7019559741020203, "learning_rate": 1.0459197273087083e-05, "loss": 2.9858707427978515, "step": 59140 }, { "epoch": 0.4774666419121268, "grad_norm": 0.6703602075576782, "learning_rate": 1.0457581803348869e-05, "loss": 2.567453956604004, "step": 59150 }, { "epoch": 0.4775473632378938, "grad_norm": 1.166040062904358, "learning_rate": 1.0455966333610657e-05, "loss": 2.74892635345459, "step": 59160 }, { "epoch": 0.47762808456366085, "grad_norm": 1.0575635433197021, "learning_rate": 1.0454350863872444e-05, "loss": 2.7855600357055663, "step": 59170 }, { "epoch": 0.47770880588942793, "grad_norm": 0.9711916446685791, "learning_rate": 1.045273539413423e-05, "loss": 4.218581008911133, "step": 59180 }, { "epoch": 0.47778952721519496, "grad_norm": 1.2780717611312866, "learning_rate": 1.0451119924396018e-05, "loss": 2.9719636917114256, "step": 59190 }, { "epoch": 0.47787024854096205, "grad_norm": 1.0059659481048584, "learning_rate": 1.0449504454657804e-05, "loss": 2.662223243713379, "step": 59200 }, { "epoch": 0.4779509698667291, "grad_norm": 2.185211420059204, "learning_rate": 1.0447888984919592e-05, "loss": 2.892727088928223, "step": 59210 }, { "epoch": 0.47803169119249617, "grad_norm": 0.7158305644989014, "learning_rate": 1.0446273515181378e-05, "loss": 2.8055763244628906, "step": 59220 }, { "epoch": 0.4781124125182632, "grad_norm": 0.6399632096290588, "learning_rate": 1.0444658045443165e-05, "loss": 2.658931922912598, "step": 59230 }, { "epoch": 0.4781931338440302, "grad_norm": 1.6383862495422363, "learning_rate": 1.0443042575704951e-05, "loss": 2.9095584869384767, "step": 59240 }, { "epoch": 0.4782738551697973, "grad_norm": 1.1010690927505493, "learning_rate": 1.0441427105966739e-05, "loss": 2.518229675292969, "step": 59250 }, { "epoch": 0.47835457649556434, "grad_norm": 0.9855228066444397, "learning_rate": 1.0439811636228525e-05, "loss": 2.813348579406738, "step": 59260 }, { "epoch": 0.47843529782133143, "grad_norm": 1.0820417404174805, "learning_rate": 1.0438196166490313e-05, "loss": 2.823236083984375, "step": 59270 }, { "epoch": 0.47851601914709846, "grad_norm": 0.9117831587791443, "learning_rate": 1.0436580696752099e-05, "loss": 2.8919933319091795, "step": 59280 }, { "epoch": 0.47859674047286554, "grad_norm": 2.02993106842041, "learning_rate": 1.0434965227013886e-05, "loss": 2.8648555755615233, "step": 59290 }, { "epoch": 0.4786774617986326, "grad_norm": 1.2516872882843018, "learning_rate": 1.0433349757275672e-05, "loss": 3.3142009735107423, "step": 59300 }, { "epoch": 0.47875818312439966, "grad_norm": 0.9905546307563782, "learning_rate": 1.043173428753746e-05, "loss": 2.508036804199219, "step": 59310 }, { "epoch": 0.4788389044501667, "grad_norm": 0.9240715503692627, "learning_rate": 1.0430118817799246e-05, "loss": 2.525678825378418, "step": 59320 }, { "epoch": 0.4789196257759337, "grad_norm": 0.6032970547676086, "learning_rate": 1.0428503348061034e-05, "loss": 3.136125946044922, "step": 59330 }, { "epoch": 0.4790003471017008, "grad_norm": 1.3135639429092407, "learning_rate": 1.042688787832282e-05, "loss": 3.124348258972168, "step": 59340 }, { "epoch": 0.47908106842746784, "grad_norm": 0.7473099231719971, "learning_rate": 1.0425272408584608e-05, "loss": 2.7179569244384765, "step": 59350 }, { "epoch": 0.4791617897532349, "grad_norm": 1.2392851114273071, "learning_rate": 1.0423656938846394e-05, "loss": 2.5174013137817384, "step": 59360 }, { "epoch": 0.47924251107900195, "grad_norm": 1.0608305931091309, "learning_rate": 1.0422041469108181e-05, "loss": 2.429733085632324, "step": 59370 }, { "epoch": 0.47932323240476904, "grad_norm": 1.011026382446289, "learning_rate": 1.0420425999369967e-05, "loss": 2.2638601303100585, "step": 59380 }, { "epoch": 0.47940395373053607, "grad_norm": 0.6638527512550354, "learning_rate": 1.0418810529631755e-05, "loss": 2.4486152648925783, "step": 59390 }, { "epoch": 0.4794846750563031, "grad_norm": 0.9259427189826965, "learning_rate": 1.0417195059893541e-05, "loss": 2.3707563400268556, "step": 59400 }, { "epoch": 0.4795653963820702, "grad_norm": 0.6273232102394104, "learning_rate": 1.0415579590155329e-05, "loss": 2.6725515365600585, "step": 59410 }, { "epoch": 0.4796461177078372, "grad_norm": 0.5615362524986267, "learning_rate": 1.0413964120417115e-05, "loss": 2.5959896087646483, "step": 59420 }, { "epoch": 0.4797268390336043, "grad_norm": 0.6281788945198059, "learning_rate": 1.0412348650678902e-05, "loss": 2.449250411987305, "step": 59430 }, { "epoch": 0.47980756035937133, "grad_norm": 1.3297046422958374, "learning_rate": 1.0410733180940688e-05, "loss": 3.024058723449707, "step": 59440 }, { "epoch": 0.4798882816851384, "grad_norm": 0.6760939359664917, "learning_rate": 1.0409117711202476e-05, "loss": 2.8233110427856447, "step": 59450 }, { "epoch": 0.47996900301090545, "grad_norm": 1.091226577758789, "learning_rate": 1.0407502241464262e-05, "loss": 3.012132453918457, "step": 59460 }, { "epoch": 0.4800497243366725, "grad_norm": 0.8319947123527527, "learning_rate": 1.040588677172605e-05, "loss": 3.0275129318237304, "step": 59470 }, { "epoch": 0.48013044566243956, "grad_norm": 1.311740517616272, "learning_rate": 1.0404271301987836e-05, "loss": 3.0804428100585937, "step": 59480 }, { "epoch": 0.4802111669882066, "grad_norm": 1.129347324371338, "learning_rate": 1.0402655832249623e-05, "loss": 2.6283416748046875, "step": 59490 }, { "epoch": 0.4802918883139737, "grad_norm": 1.1082643270492554, "learning_rate": 1.040104036251141e-05, "loss": 2.6290489196777345, "step": 59500 }, { "epoch": 0.4803726096397407, "grad_norm": 2.183232069015503, "learning_rate": 1.0399424892773197e-05, "loss": 2.5156492233276366, "step": 59510 }, { "epoch": 0.4804533309655078, "grad_norm": 1.0631145238876343, "learning_rate": 1.0397809423034983e-05, "loss": 2.907950592041016, "step": 59520 }, { "epoch": 0.4805340522912748, "grad_norm": 0.7427566647529602, "learning_rate": 1.039619395329677e-05, "loss": 2.8634267807006837, "step": 59530 }, { "epoch": 0.4806147736170419, "grad_norm": 0.7788955569267273, "learning_rate": 1.0394578483558557e-05, "loss": 3.3103710174560548, "step": 59540 }, { "epoch": 0.48069549494280894, "grad_norm": 1.1426273584365845, "learning_rate": 1.0392963013820344e-05, "loss": 2.529758834838867, "step": 59550 }, { "epoch": 0.480776216268576, "grad_norm": 0.9103429913520813, "learning_rate": 1.039134754408213e-05, "loss": 2.7681692123413084, "step": 59560 }, { "epoch": 0.48085693759434306, "grad_norm": 0.7763795256614685, "learning_rate": 1.0389732074343918e-05, "loss": 3.118657112121582, "step": 59570 }, { "epoch": 0.4809376589201101, "grad_norm": 1.622644066810608, "learning_rate": 1.0388116604605704e-05, "loss": 2.9149295806884767, "step": 59580 }, { "epoch": 0.4810183802458772, "grad_norm": 1.026768445968628, "learning_rate": 1.0386501134867492e-05, "loss": 3.0638113021850586, "step": 59590 }, { "epoch": 0.4810991015716442, "grad_norm": 0.935144305229187, "learning_rate": 1.0384885665129278e-05, "loss": 2.6504194259643556, "step": 59600 }, { "epoch": 0.4811798228974113, "grad_norm": 1.156906008720398, "learning_rate": 1.0383270195391067e-05, "loss": 2.569773483276367, "step": 59610 }, { "epoch": 0.4812605442231783, "grad_norm": 0.6840815544128418, "learning_rate": 1.0381654725652852e-05, "loss": 2.339614677429199, "step": 59620 }, { "epoch": 0.48134126554894535, "grad_norm": 1.601312518119812, "learning_rate": 1.0380039255914641e-05, "loss": 3.020941734313965, "step": 59630 }, { "epoch": 0.48142198687471244, "grad_norm": 0.6789114475250244, "learning_rate": 1.0378423786176425e-05, "loss": 3.2503719329833984, "step": 59640 }, { "epoch": 0.48150270820047947, "grad_norm": 0.6258717179298401, "learning_rate": 1.0376808316438215e-05, "loss": 2.565566062927246, "step": 59650 }, { "epoch": 0.48158342952624655, "grad_norm": 1.0577616691589355, "learning_rate": 1.0375192846699999e-05, "loss": 2.9531444549560546, "step": 59660 }, { "epoch": 0.4816641508520136, "grad_norm": 0.7949146032333374, "learning_rate": 1.0373577376961788e-05, "loss": 2.6222034454345704, "step": 59670 }, { "epoch": 0.48174487217778067, "grad_norm": 0.994010865688324, "learning_rate": 1.0371961907223573e-05, "loss": 2.675751495361328, "step": 59680 }, { "epoch": 0.4818255935035477, "grad_norm": 1.0972833633422852, "learning_rate": 1.0370346437485362e-05, "loss": 3.1860939025878907, "step": 59690 }, { "epoch": 0.48190631482931473, "grad_norm": 0.7506295442581177, "learning_rate": 1.0368730967747146e-05, "loss": 2.8863546371459963, "step": 59700 }, { "epoch": 0.4819870361550818, "grad_norm": 0.6949804425239563, "learning_rate": 1.0367115498008936e-05, "loss": 2.5684627532958983, "step": 59710 }, { "epoch": 0.48206775748084885, "grad_norm": 0.9945990443229675, "learning_rate": 1.036550002827072e-05, "loss": 2.670185661315918, "step": 59720 }, { "epoch": 0.48214847880661593, "grad_norm": 1.0512800216674805, "learning_rate": 1.036388455853251e-05, "loss": 2.939606475830078, "step": 59730 }, { "epoch": 0.48222920013238296, "grad_norm": 1.0082557201385498, "learning_rate": 1.0362269088794294e-05, "loss": 2.852046012878418, "step": 59740 }, { "epoch": 0.48230992145815005, "grad_norm": 0.768738329410553, "learning_rate": 1.0360653619056083e-05, "loss": 2.6859386444091795, "step": 59750 }, { "epoch": 0.4823906427839171, "grad_norm": 0.7150905728340149, "learning_rate": 1.0359038149317867e-05, "loss": 2.8352113723754884, "step": 59760 }, { "epoch": 0.48247136410968416, "grad_norm": 1.0093064308166504, "learning_rate": 1.0357422679579657e-05, "loss": 2.796866607666016, "step": 59770 }, { "epoch": 0.4825520854354512, "grad_norm": 0.7974266409873962, "learning_rate": 1.0355807209841443e-05, "loss": 2.5858970642089845, "step": 59780 }, { "epoch": 0.4826328067612182, "grad_norm": 0.803239107131958, "learning_rate": 1.035419174010323e-05, "loss": 3.0547840118408205, "step": 59790 }, { "epoch": 0.4827135280869853, "grad_norm": 0.6779952049255371, "learning_rate": 1.0352576270365016e-05, "loss": 2.874345588684082, "step": 59800 }, { "epoch": 0.48279424941275234, "grad_norm": 1.2098647356033325, "learning_rate": 1.0350960800626804e-05, "loss": 2.9570404052734376, "step": 59810 }, { "epoch": 0.4828749707385194, "grad_norm": 1.0051771402359009, "learning_rate": 1.034934533088859e-05, "loss": 3.058750534057617, "step": 59820 }, { "epoch": 0.48295569206428646, "grad_norm": 0.6230194568634033, "learning_rate": 1.0347729861150378e-05, "loss": 2.857069396972656, "step": 59830 }, { "epoch": 0.48303641339005354, "grad_norm": 0.9777218103408813, "learning_rate": 1.0346114391412164e-05, "loss": 2.606494903564453, "step": 59840 }, { "epoch": 0.4831171347158206, "grad_norm": 0.7278450727462769, "learning_rate": 1.0344498921673952e-05, "loss": 2.895609664916992, "step": 59850 }, { "epoch": 0.4831978560415876, "grad_norm": 1.219617247581482, "learning_rate": 1.0342883451935738e-05, "loss": 3.5260894775390623, "step": 59860 }, { "epoch": 0.4832785773673547, "grad_norm": 0.8626509308815002, "learning_rate": 1.0341267982197525e-05, "loss": 2.8078388214111327, "step": 59870 }, { "epoch": 0.4833592986931217, "grad_norm": 0.6728366017341614, "learning_rate": 1.0339652512459311e-05, "loss": 2.8898319244384765, "step": 59880 }, { "epoch": 0.4834400200188888, "grad_norm": 0.8504734635353088, "learning_rate": 1.0338037042721099e-05, "loss": 3.0738151550292967, "step": 59890 }, { "epoch": 0.48352074134465584, "grad_norm": 0.9986091256141663, "learning_rate": 1.0336421572982885e-05, "loss": 2.649928092956543, "step": 59900 }, { "epoch": 0.4836014626704229, "grad_norm": 0.8051332235336304, "learning_rate": 1.0334806103244673e-05, "loss": 3.627197265625, "step": 59910 }, { "epoch": 0.48368218399618995, "grad_norm": 0.6830031871795654, "learning_rate": 1.0333190633506459e-05, "loss": 2.8581552505493164, "step": 59920 }, { "epoch": 0.483762905321957, "grad_norm": 1.256439447402954, "learning_rate": 1.0331575163768246e-05, "loss": 2.6988901138305663, "step": 59930 }, { "epoch": 0.48384362664772407, "grad_norm": 1.0505154132843018, "learning_rate": 1.0329959694030032e-05, "loss": 3.2028171539306642, "step": 59940 }, { "epoch": 0.4839243479734911, "grad_norm": 0.8209861516952515, "learning_rate": 1.032834422429182e-05, "loss": 2.6025630950927736, "step": 59950 }, { "epoch": 0.4840050692992582, "grad_norm": 1.7065757513046265, "learning_rate": 1.0326728754553606e-05, "loss": 3.1284255981445312, "step": 59960 }, { "epoch": 0.4840857906250252, "grad_norm": 0.9451683759689331, "learning_rate": 1.0325113284815394e-05, "loss": 2.792275810241699, "step": 59970 }, { "epoch": 0.4841665119507923, "grad_norm": 1.1051443815231323, "learning_rate": 1.032349781507718e-05, "loss": 2.60229606628418, "step": 59980 }, { "epoch": 0.48424723327655933, "grad_norm": 6.280653476715088, "learning_rate": 1.0321882345338967e-05, "loss": 3.044937515258789, "step": 59990 }, { "epoch": 0.4843279546023264, "grad_norm": 1.0019736289978027, "learning_rate": 1.0320266875600753e-05, "loss": 2.464058494567871, "step": 60000 }, { "epoch": 0.48440867592809345, "grad_norm": 0.9862858653068542, "learning_rate": 1.0318651405862541e-05, "loss": 3.0322729110717774, "step": 60010 }, { "epoch": 0.4844893972538605, "grad_norm": 1.565399408340454, "learning_rate": 1.0317035936124327e-05, "loss": 2.870064353942871, "step": 60020 }, { "epoch": 0.48457011857962756, "grad_norm": 0.9717642068862915, "learning_rate": 1.0315420466386115e-05, "loss": 2.8106075286865235, "step": 60030 }, { "epoch": 0.4846508399053946, "grad_norm": 0.9642913937568665, "learning_rate": 1.03138049966479e-05, "loss": 3.3379844665527343, "step": 60040 }, { "epoch": 0.4847315612311617, "grad_norm": 0.8099586367607117, "learning_rate": 1.0312189526909688e-05, "loss": 2.604165267944336, "step": 60050 }, { "epoch": 0.4848122825569287, "grad_norm": 1.3400229215621948, "learning_rate": 1.0310574057171474e-05, "loss": 2.6017782211303713, "step": 60060 }, { "epoch": 0.4848930038826958, "grad_norm": 0.7264768481254578, "learning_rate": 1.0308958587433262e-05, "loss": 2.7769765853881836, "step": 60070 }, { "epoch": 0.4849737252084628, "grad_norm": 0.586677610874176, "learning_rate": 1.0307343117695048e-05, "loss": 2.3670230865478517, "step": 60080 }, { "epoch": 0.48505444653422986, "grad_norm": 0.5584161877632141, "learning_rate": 1.0305727647956836e-05, "loss": 2.769565200805664, "step": 60090 }, { "epoch": 0.48513516785999694, "grad_norm": 0.8370271325111389, "learning_rate": 1.0304112178218622e-05, "loss": 2.583776664733887, "step": 60100 }, { "epoch": 0.48521588918576397, "grad_norm": 0.870931088924408, "learning_rate": 1.030249670848041e-05, "loss": 3.0729480743408204, "step": 60110 }, { "epoch": 0.48529661051153106, "grad_norm": 0.9265768527984619, "learning_rate": 1.0300881238742196e-05, "loss": 2.5426891326904295, "step": 60120 }, { "epoch": 0.4853773318372981, "grad_norm": 1.4000142812728882, "learning_rate": 1.0299265769003983e-05, "loss": 2.9457292556762695, "step": 60130 }, { "epoch": 0.4854580531630652, "grad_norm": 0.6553357243537903, "learning_rate": 1.029765029926577e-05, "loss": 3.2167510986328125, "step": 60140 }, { "epoch": 0.4855387744888322, "grad_norm": 0.7912943363189697, "learning_rate": 1.0296034829527557e-05, "loss": 2.905523109436035, "step": 60150 }, { "epoch": 0.48561949581459923, "grad_norm": 1.089218020439148, "learning_rate": 1.0294419359789343e-05, "loss": 2.6482025146484376, "step": 60160 }, { "epoch": 0.4857002171403663, "grad_norm": 0.6884830594062805, "learning_rate": 1.029280389005113e-05, "loss": 2.6533233642578127, "step": 60170 }, { "epoch": 0.48578093846613335, "grad_norm": 1.2069522142410278, "learning_rate": 1.0291188420312917e-05, "loss": 2.242095184326172, "step": 60180 }, { "epoch": 0.48586165979190044, "grad_norm": 0.9960156083106995, "learning_rate": 1.0289572950574704e-05, "loss": 2.9667964935302735, "step": 60190 }, { "epoch": 0.48594238111766747, "grad_norm": 1.0242843627929688, "learning_rate": 1.028795748083649e-05, "loss": 2.7858131408691404, "step": 60200 }, { "epoch": 0.48602310244343455, "grad_norm": 0.5898187160491943, "learning_rate": 1.0286342011098278e-05, "loss": 2.8730304718017576, "step": 60210 }, { "epoch": 0.4861038237692016, "grad_norm": 1.4033761024475098, "learning_rate": 1.0284726541360064e-05, "loss": 2.8243703842163086, "step": 60220 }, { "epoch": 0.4861845450949686, "grad_norm": 1.4434517621994019, "learning_rate": 1.0283111071621852e-05, "loss": 2.819189453125, "step": 60230 }, { "epoch": 0.4862652664207357, "grad_norm": 0.9201036095619202, "learning_rate": 1.0281495601883638e-05, "loss": 3.0469968795776365, "step": 60240 }, { "epoch": 0.48634598774650273, "grad_norm": 0.7933660745620728, "learning_rate": 1.0279880132145425e-05, "loss": 2.7839088439941406, "step": 60250 }, { "epoch": 0.4864267090722698, "grad_norm": 1.189392328262329, "learning_rate": 1.0278264662407211e-05, "loss": 3.4440147399902346, "step": 60260 }, { "epoch": 0.48650743039803684, "grad_norm": 1.006445050239563, "learning_rate": 1.0276649192668999e-05, "loss": 3.138945198059082, "step": 60270 }, { "epoch": 0.48658815172380393, "grad_norm": 0.8107032179832458, "learning_rate": 1.0275033722930785e-05, "loss": 3.1816051483154295, "step": 60280 }, { "epoch": 0.48666887304957096, "grad_norm": 0.6967210173606873, "learning_rate": 1.0273418253192573e-05, "loss": 3.1770292282104493, "step": 60290 }, { "epoch": 0.48674959437533805, "grad_norm": 1.4045720100402832, "learning_rate": 1.0271802783454359e-05, "loss": 2.7220834732055663, "step": 60300 }, { "epoch": 0.4868303157011051, "grad_norm": 0.6831101179122925, "learning_rate": 1.0270187313716146e-05, "loss": 2.7099868774414064, "step": 60310 }, { "epoch": 0.4869110370268721, "grad_norm": 1.2843279838562012, "learning_rate": 1.0268571843977932e-05, "loss": 2.4312543869018555, "step": 60320 }, { "epoch": 0.4869917583526392, "grad_norm": 0.8265565633773804, "learning_rate": 1.026695637423972e-05, "loss": 2.8535470962524414, "step": 60330 }, { "epoch": 0.4870724796784062, "grad_norm": 0.8484578132629395, "learning_rate": 1.0265340904501506e-05, "loss": 2.761365509033203, "step": 60340 }, { "epoch": 0.4871532010041733, "grad_norm": 0.9384176731109619, "learning_rate": 1.0263725434763294e-05, "loss": 2.8911405563354493, "step": 60350 }, { "epoch": 0.48723392232994034, "grad_norm": 0.9139359593391418, "learning_rate": 1.026210996502508e-05, "loss": 2.8311670303344725, "step": 60360 }, { "epoch": 0.4873146436557074, "grad_norm": 0.5714908242225647, "learning_rate": 1.0260494495286868e-05, "loss": 2.553857612609863, "step": 60370 }, { "epoch": 0.48739536498147445, "grad_norm": 0.7487890124320984, "learning_rate": 1.0258879025548654e-05, "loss": 2.8242767333984373, "step": 60380 }, { "epoch": 0.4874760863072415, "grad_norm": 0.6273084878921509, "learning_rate": 1.0257263555810441e-05, "loss": 3.4769325256347656, "step": 60390 }, { "epoch": 0.48755680763300857, "grad_norm": 0.7770223021507263, "learning_rate": 1.0255648086072227e-05, "loss": 2.9821754455566407, "step": 60400 }, { "epoch": 0.4876375289587756, "grad_norm": 1.070556879043579, "learning_rate": 1.0254032616334015e-05, "loss": 3.193729209899902, "step": 60410 }, { "epoch": 0.4877182502845427, "grad_norm": 1.010111927986145, "learning_rate": 1.0252417146595803e-05, "loss": 2.7104400634765624, "step": 60420 }, { "epoch": 0.4877989716103097, "grad_norm": 0.6391497254371643, "learning_rate": 1.0250801676857589e-05, "loss": 2.872890663146973, "step": 60430 }, { "epoch": 0.4878796929360768, "grad_norm": 0.9902772903442383, "learning_rate": 1.0249186207119376e-05, "loss": 2.501823616027832, "step": 60440 }, { "epoch": 0.48796041426184383, "grad_norm": 1.1255745887756348, "learning_rate": 1.0247570737381162e-05, "loss": 2.451732063293457, "step": 60450 }, { "epoch": 0.48804113558761086, "grad_norm": 1.2503312826156616, "learning_rate": 1.024595526764295e-05, "loss": 3.162103462219238, "step": 60460 }, { "epoch": 0.48812185691337795, "grad_norm": 0.8494945764541626, "learning_rate": 1.0244339797904736e-05, "loss": 2.804221343994141, "step": 60470 }, { "epoch": 0.488202578239145, "grad_norm": 1.2897058725357056, "learning_rate": 1.0242724328166525e-05, "loss": 2.737710189819336, "step": 60480 }, { "epoch": 0.48828329956491207, "grad_norm": 0.9122979044914246, "learning_rate": 1.024110885842831e-05, "loss": 2.742757797241211, "step": 60490 }, { "epoch": 0.4883640208906791, "grad_norm": 0.6423544883728027, "learning_rate": 1.0239493388690099e-05, "loss": 2.7961071014404295, "step": 60500 }, { "epoch": 0.4884447422164462, "grad_norm": 0.8781695365905762, "learning_rate": 1.0237877918951883e-05, "loss": 2.7477136611938477, "step": 60510 }, { "epoch": 0.4885254635422132, "grad_norm": 1.139112114906311, "learning_rate": 1.0236262449213673e-05, "loss": 2.637731742858887, "step": 60520 }, { "epoch": 0.4886061848679803, "grad_norm": 1.0054855346679688, "learning_rate": 1.0234646979475457e-05, "loss": 2.4311893463134764, "step": 60530 }, { "epoch": 0.48868690619374733, "grad_norm": 1.1124709844589233, "learning_rate": 1.0233031509737246e-05, "loss": 2.646858978271484, "step": 60540 }, { "epoch": 0.48876762751951436, "grad_norm": 1.0830378532409668, "learning_rate": 1.023141603999903e-05, "loss": 2.6016027450561525, "step": 60550 }, { "epoch": 0.48884834884528144, "grad_norm": 1.199313998222351, "learning_rate": 1.022980057026082e-05, "loss": 2.537458038330078, "step": 60560 }, { "epoch": 0.4889290701710485, "grad_norm": 1.1362253427505493, "learning_rate": 1.0228185100522604e-05, "loss": 2.5508955001831053, "step": 60570 }, { "epoch": 0.48900979149681556, "grad_norm": 0.8929249048233032, "learning_rate": 1.0226569630784394e-05, "loss": 2.687466049194336, "step": 60580 }, { "epoch": 0.4890905128225826, "grad_norm": 1.0058859586715698, "learning_rate": 1.0224954161046178e-05, "loss": 2.435923194885254, "step": 60590 }, { "epoch": 0.4891712341483497, "grad_norm": 0.8203619718551636, "learning_rate": 1.0223338691307967e-05, "loss": 2.9615451812744142, "step": 60600 }, { "epoch": 0.4892519554741167, "grad_norm": 0.9237666726112366, "learning_rate": 1.0221723221569752e-05, "loss": 2.9667837142944338, "step": 60610 }, { "epoch": 0.48933267679988374, "grad_norm": 0.9731907844543457, "learning_rate": 1.0220107751831541e-05, "loss": 2.8216888427734377, "step": 60620 }, { "epoch": 0.4894133981256508, "grad_norm": 0.6717122793197632, "learning_rate": 1.0218492282093325e-05, "loss": 2.3492183685302734, "step": 60630 }, { "epoch": 0.48949411945141785, "grad_norm": 1.1109510660171509, "learning_rate": 1.0216876812355115e-05, "loss": 2.6787908554077147, "step": 60640 }, { "epoch": 0.48957484077718494, "grad_norm": 0.7634261846542358, "learning_rate": 1.0215261342616901e-05, "loss": 3.158920097351074, "step": 60650 }, { "epoch": 0.48965556210295197, "grad_norm": 1.0082714557647705, "learning_rate": 1.0213645872878689e-05, "loss": 3.170565605163574, "step": 60660 }, { "epoch": 0.48973628342871905, "grad_norm": 1.0845388174057007, "learning_rate": 1.0212030403140475e-05, "loss": 3.1764110565185546, "step": 60670 }, { "epoch": 0.4898170047544861, "grad_norm": 1.065975308418274, "learning_rate": 1.0210414933402262e-05, "loss": 2.839346694946289, "step": 60680 }, { "epoch": 0.4898977260802531, "grad_norm": 1.5271354913711548, "learning_rate": 1.0208799463664048e-05, "loss": 2.987993621826172, "step": 60690 }, { "epoch": 0.4899784474060202, "grad_norm": 0.6530702114105225, "learning_rate": 1.0207183993925836e-05, "loss": 3.621257781982422, "step": 60700 }, { "epoch": 0.49005916873178723, "grad_norm": 1.278372883796692, "learning_rate": 1.0205568524187622e-05, "loss": 3.2460155487060547, "step": 60710 }, { "epoch": 0.4901398900575543, "grad_norm": 0.8927082419395447, "learning_rate": 1.020395305444941e-05, "loss": 3.2531917572021483, "step": 60720 }, { "epoch": 0.49022061138332135, "grad_norm": 0.9379038214683533, "learning_rate": 1.0202337584711196e-05, "loss": 3.0437995910644533, "step": 60730 }, { "epoch": 0.49030133270908843, "grad_norm": 1.1750729084014893, "learning_rate": 1.0200722114972983e-05, "loss": 2.494411659240723, "step": 60740 }, { "epoch": 0.49038205403485546, "grad_norm": 0.9715591669082642, "learning_rate": 1.019910664523477e-05, "loss": 2.702816963195801, "step": 60750 }, { "epoch": 0.49046277536062255, "grad_norm": 0.7636282444000244, "learning_rate": 1.0197491175496557e-05, "loss": 3.128565788269043, "step": 60760 }, { "epoch": 0.4905434966863896, "grad_norm": 1.0207723379135132, "learning_rate": 1.0195875705758343e-05, "loss": 2.8281744003295897, "step": 60770 }, { "epoch": 0.4906242180121566, "grad_norm": 1.031900405883789, "learning_rate": 1.019426023602013e-05, "loss": 2.7780893325805662, "step": 60780 }, { "epoch": 0.4907049393379237, "grad_norm": 0.6849764585494995, "learning_rate": 1.0192644766281917e-05, "loss": 2.441689872741699, "step": 60790 }, { "epoch": 0.4907856606636907, "grad_norm": 0.6048141121864319, "learning_rate": 1.0191029296543704e-05, "loss": 3.0135524749755858, "step": 60800 }, { "epoch": 0.4908663819894578, "grad_norm": 1.8278990983963013, "learning_rate": 1.018941382680549e-05, "loss": 2.769467353820801, "step": 60810 }, { "epoch": 0.49094710331522484, "grad_norm": 0.9975041151046753, "learning_rate": 1.0187798357067278e-05, "loss": 2.7050100326538087, "step": 60820 }, { "epoch": 0.49102782464099193, "grad_norm": 1.0376653671264648, "learning_rate": 1.0186182887329064e-05, "loss": 2.808544731140137, "step": 60830 }, { "epoch": 0.49110854596675896, "grad_norm": 0.6012643575668335, "learning_rate": 1.0184567417590852e-05, "loss": 2.8611581802368162, "step": 60840 }, { "epoch": 0.491189267292526, "grad_norm": 0.889697253704071, "learning_rate": 1.0182951947852638e-05, "loss": 2.4778348922729494, "step": 60850 }, { "epoch": 0.4912699886182931, "grad_norm": 0.9550177454948425, "learning_rate": 1.0181336478114425e-05, "loss": 3.016082763671875, "step": 60860 }, { "epoch": 0.4913507099440601, "grad_norm": 1.0207329988479614, "learning_rate": 1.0179721008376211e-05, "loss": 3.00289249420166, "step": 60870 }, { "epoch": 0.4914314312698272, "grad_norm": 0.825486958026886, "learning_rate": 1.0178105538638e-05, "loss": 2.5629905700683593, "step": 60880 }, { "epoch": 0.4915121525955942, "grad_norm": 0.9878257513046265, "learning_rate": 1.0176490068899785e-05, "loss": 2.4391141891479493, "step": 60890 }, { "epoch": 0.4915928739213613, "grad_norm": 1.1372835636138916, "learning_rate": 1.0174874599161573e-05, "loss": 2.7596826553344727, "step": 60900 }, { "epoch": 0.49167359524712834, "grad_norm": 0.7725165486335754, "learning_rate": 1.0173259129423359e-05, "loss": 3.0503442764282225, "step": 60910 }, { "epoch": 0.49175431657289537, "grad_norm": 1.4566727876663208, "learning_rate": 1.0171643659685147e-05, "loss": 3.2753662109375, "step": 60920 }, { "epoch": 0.49183503789866245, "grad_norm": 0.8149808049201965, "learning_rate": 1.0170028189946933e-05, "loss": 2.960079002380371, "step": 60930 }, { "epoch": 0.4919157592244295, "grad_norm": 0.7997254133224487, "learning_rate": 1.016841272020872e-05, "loss": 2.999393653869629, "step": 60940 }, { "epoch": 0.49199648055019657, "grad_norm": 0.7194298505783081, "learning_rate": 1.0166797250470506e-05, "loss": 2.5222705841064452, "step": 60950 }, { "epoch": 0.4920772018759636, "grad_norm": 1.4216326475143433, "learning_rate": 1.0165181780732294e-05, "loss": 2.85279541015625, "step": 60960 }, { "epoch": 0.4921579232017307, "grad_norm": 0.732158362865448, "learning_rate": 1.016356631099408e-05, "loss": 2.543519973754883, "step": 60970 }, { "epoch": 0.4922386445274977, "grad_norm": 1.5935685634613037, "learning_rate": 1.0161950841255868e-05, "loss": 2.740309715270996, "step": 60980 }, { "epoch": 0.4923193658532648, "grad_norm": 1.6726630926132202, "learning_rate": 1.0160335371517654e-05, "loss": 2.8896718978881837, "step": 60990 }, { "epoch": 0.49240008717903183, "grad_norm": 1.0386505126953125, "learning_rate": 1.0158719901779441e-05, "loss": 2.8595712661743162, "step": 61000 }, { "epoch": 0.49248080850479886, "grad_norm": 0.7942756414413452, "learning_rate": 1.0157104432041227e-05, "loss": 2.812873649597168, "step": 61010 }, { "epoch": 0.49256152983056595, "grad_norm": 1.0145399570465088, "learning_rate": 1.0155488962303015e-05, "loss": 2.5677371978759767, "step": 61020 }, { "epoch": 0.492642251156333, "grad_norm": 1.269932508468628, "learning_rate": 1.0153873492564801e-05, "loss": 2.769037055969238, "step": 61030 }, { "epoch": 0.49272297248210006, "grad_norm": 1.0355931520462036, "learning_rate": 1.0152258022826589e-05, "loss": 2.630269432067871, "step": 61040 }, { "epoch": 0.4928036938078671, "grad_norm": 1.0234678983688354, "learning_rate": 1.0150642553088375e-05, "loss": 2.8803977966308594, "step": 61050 }, { "epoch": 0.4928844151336342, "grad_norm": 1.0462956428527832, "learning_rate": 1.0149027083350162e-05, "loss": 2.629182243347168, "step": 61060 }, { "epoch": 0.4929651364594012, "grad_norm": 0.8112270832061768, "learning_rate": 1.0147411613611948e-05, "loss": 2.54180793762207, "step": 61070 }, { "epoch": 0.49304585778516824, "grad_norm": 0.9541370868682861, "learning_rate": 1.0145796143873736e-05, "loss": 2.6051219940185546, "step": 61080 }, { "epoch": 0.4931265791109353, "grad_norm": 1.2561020851135254, "learning_rate": 1.0144180674135522e-05, "loss": 3.2099109649658204, "step": 61090 }, { "epoch": 0.49320730043670236, "grad_norm": 1.1286365985870361, "learning_rate": 1.014256520439731e-05, "loss": 2.8595685958862305, "step": 61100 }, { "epoch": 0.49328802176246944, "grad_norm": 0.9132683277130127, "learning_rate": 1.0140949734659096e-05, "loss": 2.551462173461914, "step": 61110 }, { "epoch": 0.49336874308823647, "grad_norm": 0.9392513632774353, "learning_rate": 1.0139334264920883e-05, "loss": 2.7958662033081056, "step": 61120 }, { "epoch": 0.49344946441400356, "grad_norm": 0.7953788042068481, "learning_rate": 1.013771879518267e-05, "loss": 3.1053899765014648, "step": 61130 }, { "epoch": 0.4935301857397706, "grad_norm": 0.8588997721672058, "learning_rate": 1.0136103325444457e-05, "loss": 2.8649574279785157, "step": 61140 }, { "epoch": 0.4936109070655376, "grad_norm": 1.439433217048645, "learning_rate": 1.0134487855706243e-05, "loss": 3.001141357421875, "step": 61150 }, { "epoch": 0.4936916283913047, "grad_norm": 0.9487671256065369, "learning_rate": 1.0132872385968031e-05, "loss": 2.7954845428466797, "step": 61160 }, { "epoch": 0.49377234971707173, "grad_norm": 1.4679373502731323, "learning_rate": 1.0131256916229817e-05, "loss": 2.37302188873291, "step": 61170 }, { "epoch": 0.4938530710428388, "grad_norm": 0.7688513994216919, "learning_rate": 1.0129641446491605e-05, "loss": 2.841866302490234, "step": 61180 }, { "epoch": 0.49393379236860585, "grad_norm": 1.181761622428894, "learning_rate": 1.012802597675339e-05, "loss": 2.6295610427856446, "step": 61190 }, { "epoch": 0.49401451369437294, "grad_norm": 0.5867567658424377, "learning_rate": 1.0126410507015178e-05, "loss": 2.6555261611938477, "step": 61200 }, { "epoch": 0.49409523502013997, "grad_norm": 1.5790458917617798, "learning_rate": 1.0124795037276964e-05, "loss": 2.7983978271484373, "step": 61210 }, { "epoch": 0.49417595634590705, "grad_norm": 1.0961873531341553, "learning_rate": 1.0123179567538752e-05, "loss": 2.820884132385254, "step": 61220 }, { "epoch": 0.4942566776716741, "grad_norm": 1.076186180114746, "learning_rate": 1.0121564097800538e-05, "loss": 2.9751201629638673, "step": 61230 }, { "epoch": 0.4943373989974411, "grad_norm": 0.932074248790741, "learning_rate": 1.0119948628062326e-05, "loss": 2.535609245300293, "step": 61240 }, { "epoch": 0.4944181203232082, "grad_norm": 0.8521384000778198, "learning_rate": 1.0118333158324112e-05, "loss": 3.110645294189453, "step": 61250 }, { "epoch": 0.49449884164897523, "grad_norm": 1.8026214838027954, "learning_rate": 1.01167176885859e-05, "loss": 2.8368078231811524, "step": 61260 }, { "epoch": 0.4945795629747423, "grad_norm": 1.5781662464141846, "learning_rate": 1.0115102218847685e-05, "loss": 2.846211242675781, "step": 61270 }, { "epoch": 0.49466028430050935, "grad_norm": 1.062313437461853, "learning_rate": 1.0113486749109473e-05, "loss": 2.9343544006347657, "step": 61280 }, { "epoch": 0.49474100562627643, "grad_norm": 1.001001238822937, "learning_rate": 1.0111871279371259e-05, "loss": 2.691153717041016, "step": 61290 }, { "epoch": 0.49482172695204346, "grad_norm": 0.5953100919723511, "learning_rate": 1.0110255809633047e-05, "loss": 2.6592498779296876, "step": 61300 }, { "epoch": 0.4949024482778105, "grad_norm": 1.6139811277389526, "learning_rate": 1.0108640339894833e-05, "loss": 2.673503303527832, "step": 61310 }, { "epoch": 0.4949831696035776, "grad_norm": 1.7196025848388672, "learning_rate": 1.010702487015662e-05, "loss": 3.0454755783081056, "step": 61320 }, { "epoch": 0.4950638909293446, "grad_norm": 0.6459023952484131, "learning_rate": 1.0105409400418406e-05, "loss": 2.5472816467285155, "step": 61330 }, { "epoch": 0.4951446122551117, "grad_norm": 0.5738323926925659, "learning_rate": 1.0103793930680194e-05, "loss": 2.5221202850341795, "step": 61340 }, { "epoch": 0.4952253335808787, "grad_norm": 0.6998001933097839, "learning_rate": 1.010217846094198e-05, "loss": 2.7364643096923826, "step": 61350 }, { "epoch": 0.4953060549066458, "grad_norm": 0.699435293674469, "learning_rate": 1.0100562991203768e-05, "loss": 2.878843879699707, "step": 61360 }, { "epoch": 0.49538677623241284, "grad_norm": 0.9226252436637878, "learning_rate": 1.0098947521465554e-05, "loss": 3.012613296508789, "step": 61370 }, { "epoch": 0.49546749755817987, "grad_norm": 0.8208152055740356, "learning_rate": 1.0097332051727341e-05, "loss": 2.920954704284668, "step": 61380 }, { "epoch": 0.49554821888394696, "grad_norm": 0.8180087208747864, "learning_rate": 1.0095716581989127e-05, "loss": 2.4965314865112305, "step": 61390 }, { "epoch": 0.495628940209714, "grad_norm": 0.5559254884719849, "learning_rate": 1.0094101112250915e-05, "loss": 2.452792167663574, "step": 61400 }, { "epoch": 0.49570966153548107, "grad_norm": 1.1884254217147827, "learning_rate": 1.0092485642512701e-05, "loss": 2.608428192138672, "step": 61410 }, { "epoch": 0.4957903828612481, "grad_norm": 0.8733107447624207, "learning_rate": 1.0090870172774489e-05, "loss": 2.7962860107421874, "step": 61420 }, { "epoch": 0.4958711041870152, "grad_norm": 0.6392911076545715, "learning_rate": 1.0089254703036275e-05, "loss": 2.8970340728759765, "step": 61430 }, { "epoch": 0.4959518255127822, "grad_norm": 1.2658188343048096, "learning_rate": 1.0087639233298063e-05, "loss": 2.8792919158935546, "step": 61440 }, { "epoch": 0.49603254683854925, "grad_norm": 0.7228187918663025, "learning_rate": 1.0086023763559849e-05, "loss": 3.3853176116943358, "step": 61450 }, { "epoch": 0.49611326816431633, "grad_norm": 0.7496005296707153, "learning_rate": 1.0084408293821636e-05, "loss": 2.7365909576416017, "step": 61460 }, { "epoch": 0.49619398949008336, "grad_norm": 0.6606804728507996, "learning_rate": 1.0082792824083422e-05, "loss": 2.666951560974121, "step": 61470 }, { "epoch": 0.49627471081585045, "grad_norm": 1.6267731189727783, "learning_rate": 1.008117735434521e-05, "loss": 2.6783473968505858, "step": 61480 }, { "epoch": 0.4963554321416175, "grad_norm": 0.9428319334983826, "learning_rate": 1.0079561884606996e-05, "loss": 3.2217052459716795, "step": 61490 }, { "epoch": 0.49643615346738457, "grad_norm": 0.9878536462783813, "learning_rate": 1.0077946414868784e-05, "loss": 3.692885971069336, "step": 61500 }, { "epoch": 0.4965168747931516, "grad_norm": 0.7823271751403809, "learning_rate": 1.007633094513057e-05, "loss": 2.604449653625488, "step": 61510 }, { "epoch": 0.4965975961189187, "grad_norm": 0.9454875588417053, "learning_rate": 1.0074715475392359e-05, "loss": 3.040410041809082, "step": 61520 }, { "epoch": 0.4966783174446857, "grad_norm": 0.9363090991973877, "learning_rate": 1.0073100005654143e-05, "loss": 2.744161605834961, "step": 61530 }, { "epoch": 0.49675903877045274, "grad_norm": 0.9758971333503723, "learning_rate": 1.0071484535915933e-05, "loss": 3.4173999786376954, "step": 61540 }, { "epoch": 0.49683976009621983, "grad_norm": 0.8871788382530212, "learning_rate": 1.0069869066177717e-05, "loss": 2.8492013931274416, "step": 61550 }, { "epoch": 0.49692048142198686, "grad_norm": 0.8855471014976501, "learning_rate": 1.0068253596439506e-05, "loss": 2.962565803527832, "step": 61560 }, { "epoch": 0.49700120274775395, "grad_norm": 1.7277417182922363, "learning_rate": 1.006663812670129e-05, "loss": 2.7782787322998046, "step": 61570 }, { "epoch": 0.497081924073521, "grad_norm": 1.1762797832489014, "learning_rate": 1.006502265696308e-05, "loss": 2.553291130065918, "step": 61580 }, { "epoch": 0.49716264539928806, "grad_norm": 1.0977249145507812, "learning_rate": 1.0063407187224864e-05, "loss": 2.9059099197387694, "step": 61590 }, { "epoch": 0.4972433667250551, "grad_norm": 1.4018906354904175, "learning_rate": 1.0061791717486654e-05, "loss": 2.872258949279785, "step": 61600 }, { "epoch": 0.4973240880508221, "grad_norm": 1.377031922340393, "learning_rate": 1.0060176247748438e-05, "loss": 2.589305877685547, "step": 61610 }, { "epoch": 0.4974048093765892, "grad_norm": 0.7349371910095215, "learning_rate": 1.0058560778010227e-05, "loss": 3.166016387939453, "step": 61620 }, { "epoch": 0.49748553070235624, "grad_norm": 0.9884769916534424, "learning_rate": 1.0056945308272012e-05, "loss": 2.641594886779785, "step": 61630 }, { "epoch": 0.4975662520281233, "grad_norm": 0.9023275375366211, "learning_rate": 1.0055329838533801e-05, "loss": 2.524001884460449, "step": 61640 }, { "epoch": 0.49764697335389035, "grad_norm": 0.5635959506034851, "learning_rate": 1.0053714368795585e-05, "loss": 2.8877841949462892, "step": 61650 }, { "epoch": 0.49772769467965744, "grad_norm": 1.0291013717651367, "learning_rate": 1.0052098899057375e-05, "loss": 2.6295696258544923, "step": 61660 }, { "epoch": 0.49780841600542447, "grad_norm": 1.035291075706482, "learning_rate": 1.0050483429319163e-05, "loss": 2.756102752685547, "step": 61670 }, { "epoch": 0.4978891373311915, "grad_norm": 0.8770505785942078, "learning_rate": 1.0048867959580949e-05, "loss": 2.947520446777344, "step": 61680 }, { "epoch": 0.4979698586569586, "grad_norm": 1.293755292892456, "learning_rate": 1.0047252489842736e-05, "loss": 3.388885498046875, "step": 61690 }, { "epoch": 0.4980505799827256, "grad_norm": 0.672175943851471, "learning_rate": 1.0045637020104522e-05, "loss": 2.446114730834961, "step": 61700 }, { "epoch": 0.4981313013084927, "grad_norm": 2.4545345306396484, "learning_rate": 1.004402155036631e-05, "loss": 2.7183660507202148, "step": 61710 }, { "epoch": 0.49821202263425973, "grad_norm": 0.7559486627578735, "learning_rate": 1.0042406080628096e-05, "loss": 3.3453330993652344, "step": 61720 }, { "epoch": 0.4982927439600268, "grad_norm": 1.2544795274734497, "learning_rate": 1.0040790610889884e-05, "loss": 3.1849847793579102, "step": 61730 }, { "epoch": 0.49837346528579385, "grad_norm": 0.6490107178688049, "learning_rate": 1.003917514115167e-05, "loss": 2.456489372253418, "step": 61740 }, { "epoch": 0.49845418661156093, "grad_norm": 0.8561004996299744, "learning_rate": 1.0037559671413457e-05, "loss": 2.698515510559082, "step": 61750 }, { "epoch": 0.49853490793732796, "grad_norm": 1.1035076379776, "learning_rate": 1.0035944201675243e-05, "loss": 3.029002571105957, "step": 61760 }, { "epoch": 0.498615629263095, "grad_norm": 0.7381746172904968, "learning_rate": 1.0034328731937031e-05, "loss": 2.483230400085449, "step": 61770 }, { "epoch": 0.4986963505888621, "grad_norm": 1.0054302215576172, "learning_rate": 1.0032713262198817e-05, "loss": 2.363674545288086, "step": 61780 }, { "epoch": 0.4987770719146291, "grad_norm": 1.0265754461288452, "learning_rate": 1.0031097792460605e-05, "loss": 2.9776777267456054, "step": 61790 }, { "epoch": 0.4988577932403962, "grad_norm": 0.7070863246917725, "learning_rate": 1.002948232272239e-05, "loss": 2.6881385803222657, "step": 61800 }, { "epoch": 0.4989385145661632, "grad_norm": 0.7950799465179443, "learning_rate": 1.0027866852984178e-05, "loss": 3.043497848510742, "step": 61810 }, { "epoch": 0.4990192358919303, "grad_norm": 0.8388614058494568, "learning_rate": 1.0026251383245964e-05, "loss": 2.472401809692383, "step": 61820 }, { "epoch": 0.49909995721769734, "grad_norm": 0.5593527555465698, "learning_rate": 1.0024635913507752e-05, "loss": 2.284526062011719, "step": 61830 }, { "epoch": 0.4991806785434644, "grad_norm": 0.618035614490509, "learning_rate": 1.0023020443769538e-05, "loss": 3.0058361053466798, "step": 61840 }, { "epoch": 0.49926139986923146, "grad_norm": 1.179423213005066, "learning_rate": 1.0021404974031326e-05, "loss": 3.2426937103271483, "step": 61850 }, { "epoch": 0.4993421211949985, "grad_norm": 0.8713903427124023, "learning_rate": 1.0019789504293112e-05, "loss": 2.727580261230469, "step": 61860 }, { "epoch": 0.4994228425207656, "grad_norm": 1.0280492305755615, "learning_rate": 1.00181740345549e-05, "loss": 3.54937858581543, "step": 61870 }, { "epoch": 0.4995035638465326, "grad_norm": 0.9101462364196777, "learning_rate": 1.0016558564816685e-05, "loss": 3.3896240234375, "step": 61880 }, { "epoch": 0.4995842851722997, "grad_norm": 0.7578888535499573, "learning_rate": 1.0014943095078473e-05, "loss": 3.0810791015625, "step": 61890 }, { "epoch": 0.4996650064980667, "grad_norm": 1.163509726524353, "learning_rate": 1.0013327625340259e-05, "loss": 2.9057233810424803, "step": 61900 }, { "epoch": 0.49974572782383375, "grad_norm": 0.8804201483726501, "learning_rate": 1.0011712155602047e-05, "loss": 2.6432422637939452, "step": 61910 }, { "epoch": 0.49982644914960084, "grad_norm": 0.9799640774726868, "learning_rate": 1.0010096685863833e-05, "loss": 3.1975440979003906, "step": 61920 }, { "epoch": 0.49990717047536787, "grad_norm": 0.7186727523803711, "learning_rate": 1.000848121612562e-05, "loss": 2.635445404052734, "step": 61930 }, { "epoch": 0.49998789180113495, "grad_norm": 0.6125782132148743, "learning_rate": 1.0006865746387407e-05, "loss": 2.969839859008789, "step": 61940 }, { "epoch": 0.500068613126902, "grad_norm": 0.8007249236106873, "learning_rate": 1.0005250276649194e-05, "loss": 2.7673929214477537, "step": 61950 }, { "epoch": 0.5001493344526691, "grad_norm": 1.4244099855422974, "learning_rate": 1.000363480691098e-05, "loss": 3.0734699249267576, "step": 61960 }, { "epoch": 0.5002300557784362, "grad_norm": 0.8634440302848816, "learning_rate": 1.0002019337172768e-05, "loss": 2.687112808227539, "step": 61970 }, { "epoch": 0.5003107771042031, "grad_norm": 0.9185909628868103, "learning_rate": 1.0000403867434554e-05, "loss": 2.841762351989746, "step": 61980 }, { "epoch": 0.5003914984299702, "grad_norm": 0.6069133877754211, "learning_rate": 9.99878839769634e-06, "loss": 2.80096321105957, "step": 61990 }, { "epoch": 0.5004722197557373, "grad_norm": 1.1176739931106567, "learning_rate": 9.997172927958128e-06, "loss": 2.84265079498291, "step": 62000 }, { "epoch": 0.5005529410815043, "grad_norm": 0.8568295240402222, "learning_rate": 9.995557458219914e-06, "loss": 2.9689682006835936, "step": 62010 }, { "epoch": 0.5006336624072714, "grad_norm": 1.3912180662155151, "learning_rate": 9.993941988481701e-06, "loss": 2.555594635009766, "step": 62020 }, { "epoch": 0.5007143837330384, "grad_norm": 0.7851918935775757, "learning_rate": 9.992326518743489e-06, "loss": 2.5289739608764648, "step": 62030 }, { "epoch": 0.5007951050588055, "grad_norm": 0.572867214679718, "learning_rate": 9.990711049005275e-06, "loss": 2.802618980407715, "step": 62040 }, { "epoch": 0.5008758263845725, "grad_norm": 0.9966775178909302, "learning_rate": 9.989095579267063e-06, "loss": 2.8321468353271486, "step": 62050 }, { "epoch": 0.5009565477103396, "grad_norm": 1.1192296743392944, "learning_rate": 9.987480109528849e-06, "loss": 3.4019004821777346, "step": 62060 }, { "epoch": 0.5010372690361067, "grad_norm": 0.711543083190918, "learning_rate": 9.985864639790636e-06, "loss": 2.9122234344482423, "step": 62070 }, { "epoch": 0.5011179903618737, "grad_norm": 0.8074836730957031, "learning_rate": 9.984249170052422e-06, "loss": 2.9537925720214844, "step": 62080 }, { "epoch": 0.5011987116876407, "grad_norm": 0.6894704699516296, "learning_rate": 9.98263370031421e-06, "loss": 2.5845064163208007, "step": 62090 }, { "epoch": 0.5012794330134078, "grad_norm": 1.1117912530899048, "learning_rate": 9.981018230575996e-06, "loss": 2.773669624328613, "step": 62100 }, { "epoch": 0.5013601543391749, "grad_norm": 0.7754833102226257, "learning_rate": 9.979402760837784e-06, "loss": 3.619629669189453, "step": 62110 }, { "epoch": 0.5014408756649419, "grad_norm": 0.840824544429779, "learning_rate": 9.97778729109957e-06, "loss": 3.0040563583374023, "step": 62120 }, { "epoch": 0.501521596990709, "grad_norm": 1.0286710262298584, "learning_rate": 9.976171821361357e-06, "loss": 2.7642932891845704, "step": 62130 }, { "epoch": 0.5016023183164761, "grad_norm": 0.9472775459289551, "learning_rate": 9.974556351623143e-06, "loss": 2.385907745361328, "step": 62140 }, { "epoch": 0.501683039642243, "grad_norm": 0.8177242279052734, "learning_rate": 9.972940881884931e-06, "loss": 2.646458053588867, "step": 62150 }, { "epoch": 0.5017637609680101, "grad_norm": 0.8188559412956238, "learning_rate": 9.971325412146717e-06, "loss": 2.771404838562012, "step": 62160 }, { "epoch": 0.5018444822937772, "grad_norm": 0.8354328274726868, "learning_rate": 9.969709942408505e-06, "loss": 3.1034080505371096, "step": 62170 }, { "epoch": 0.5019252036195443, "grad_norm": 1.1275099515914917, "learning_rate": 9.96809447267029e-06, "loss": 2.6939958572387694, "step": 62180 }, { "epoch": 0.5020059249453113, "grad_norm": 0.9858490824699402, "learning_rate": 9.966479002932079e-06, "loss": 2.579905700683594, "step": 62190 }, { "epoch": 0.5020866462710784, "grad_norm": 0.9403866529464722, "learning_rate": 9.964863533193865e-06, "loss": 2.426021385192871, "step": 62200 }, { "epoch": 0.5021673675968454, "grad_norm": 1.0485228300094604, "learning_rate": 9.963248063455652e-06, "loss": 2.4592971801757812, "step": 62210 }, { "epoch": 0.5022480889226124, "grad_norm": 0.7922062873840332, "learning_rate": 9.961632593717438e-06, "loss": 2.756414604187012, "step": 62220 }, { "epoch": 0.5023288102483795, "grad_norm": 1.20638906955719, "learning_rate": 9.960017123979226e-06, "loss": 2.984890174865723, "step": 62230 }, { "epoch": 0.5024095315741466, "grad_norm": 0.6925155520439148, "learning_rate": 9.958401654241012e-06, "loss": 2.5076486587524416, "step": 62240 }, { "epoch": 0.5024902528999137, "grad_norm": 0.8742615580558777, "learning_rate": 9.9567861845028e-06, "loss": 2.902241516113281, "step": 62250 }, { "epoch": 0.5025709742256806, "grad_norm": 0.9154860377311707, "learning_rate": 9.955170714764586e-06, "loss": 2.670025444030762, "step": 62260 }, { "epoch": 0.5026516955514477, "grad_norm": 0.6067345142364502, "learning_rate": 9.953555245026373e-06, "loss": 2.7574819564819335, "step": 62270 }, { "epoch": 0.5027324168772148, "grad_norm": 0.9750869274139404, "learning_rate": 9.95193977528816e-06, "loss": 3.1386507034301756, "step": 62280 }, { "epoch": 0.5028131382029818, "grad_norm": 1.113954782485962, "learning_rate": 9.950324305549947e-06, "loss": 3.207785797119141, "step": 62290 }, { "epoch": 0.5028938595287489, "grad_norm": 2.020688772201538, "learning_rate": 9.948708835811735e-06, "loss": 3.0010637283325194, "step": 62300 }, { "epoch": 0.502974580854516, "grad_norm": 0.7611095309257507, "learning_rate": 9.94709336607352e-06, "loss": 2.8241886138916015, "step": 62310 }, { "epoch": 0.503055302180283, "grad_norm": 0.8949041962623596, "learning_rate": 9.945477896335308e-06, "loss": 2.931599235534668, "step": 62320 }, { "epoch": 0.50313602350605, "grad_norm": 0.6646500825881958, "learning_rate": 9.943862426597094e-06, "loss": 2.517352485656738, "step": 62330 }, { "epoch": 0.5032167448318171, "grad_norm": 1.0997799634933472, "learning_rate": 9.942246956858882e-06, "loss": 2.3752260208129883, "step": 62340 }, { "epoch": 0.5032974661575842, "grad_norm": 0.9376562237739563, "learning_rate": 9.940631487120668e-06, "loss": 2.820648765563965, "step": 62350 }, { "epoch": 0.5033781874833513, "grad_norm": 1.5915462970733643, "learning_rate": 9.939016017382456e-06, "loss": 2.545524024963379, "step": 62360 }, { "epoch": 0.5034589088091183, "grad_norm": 1.0652830600738525, "learning_rate": 9.937400547644242e-06, "loss": 2.733673667907715, "step": 62370 }, { "epoch": 0.5035396301348853, "grad_norm": 1.0278040170669556, "learning_rate": 9.93578507790603e-06, "loss": 2.552743148803711, "step": 62380 }, { "epoch": 0.5036203514606524, "grad_norm": 0.943078875541687, "learning_rate": 9.934169608167817e-06, "loss": 2.7821849822998046, "step": 62390 }, { "epoch": 0.5037010727864194, "grad_norm": 1.6948782205581665, "learning_rate": 9.932554138429603e-06, "loss": 2.9133127212524412, "step": 62400 }, { "epoch": 0.5037817941121865, "grad_norm": 1.03878915309906, "learning_rate": 9.93093866869139e-06, "loss": 2.7368986129760744, "step": 62410 }, { "epoch": 0.5038625154379536, "grad_norm": 1.5677378177642822, "learning_rate": 9.929323198953177e-06, "loss": 2.957998847961426, "step": 62420 }, { "epoch": 0.5039432367637207, "grad_norm": 1.0720199346542358, "learning_rate": 9.927707729214964e-06, "loss": 2.7032495498657227, "step": 62430 }, { "epoch": 0.5040239580894876, "grad_norm": 1.0142844915390015, "learning_rate": 9.92609225947675e-06, "loss": 2.5928606033325194, "step": 62440 }, { "epoch": 0.5041046794152547, "grad_norm": 0.9906235933303833, "learning_rate": 9.924476789738538e-06, "loss": 3.040560722351074, "step": 62450 }, { "epoch": 0.5041854007410218, "grad_norm": 1.3021800518035889, "learning_rate": 9.922861320000324e-06, "loss": 3.15216064453125, "step": 62460 }, { "epoch": 0.5042661220667888, "grad_norm": 1.0890096426010132, "learning_rate": 9.921245850262112e-06, "loss": 2.6966569900512694, "step": 62470 }, { "epoch": 0.5043468433925559, "grad_norm": 0.8959314823150635, "learning_rate": 9.919630380523898e-06, "loss": 2.7005636215209963, "step": 62480 }, { "epoch": 0.504427564718323, "grad_norm": 0.985444188117981, "learning_rate": 9.918014910785686e-06, "loss": 2.3847043991088865, "step": 62490 }, { "epoch": 0.50450828604409, "grad_norm": 0.6512814164161682, "learning_rate": 9.916399441047472e-06, "loss": 2.9222843170166017, "step": 62500 }, { "epoch": 0.504589007369857, "grad_norm": 0.9132644534111023, "learning_rate": 9.91478397130926e-06, "loss": 2.5572978973388674, "step": 62510 }, { "epoch": 0.5046697286956241, "grad_norm": 1.4728447198867798, "learning_rate": 9.913168501571045e-06, "loss": 2.8475858688354494, "step": 62520 }, { "epoch": 0.5047504500213912, "grad_norm": 0.7087832689285278, "learning_rate": 9.911553031832833e-06, "loss": 2.4290639877319338, "step": 62530 }, { "epoch": 0.5048311713471582, "grad_norm": 1.9925508499145508, "learning_rate": 9.909937562094619e-06, "loss": 3.4009349822998045, "step": 62540 }, { "epoch": 0.5049118926729252, "grad_norm": 0.695501983165741, "learning_rate": 9.908322092356407e-06, "loss": 2.841302680969238, "step": 62550 }, { "epoch": 0.5049926139986923, "grad_norm": 0.6651877164840698, "learning_rate": 9.906706622618193e-06, "loss": 2.9775053024291993, "step": 62560 }, { "epoch": 0.5050733353244594, "grad_norm": 1.2469666004180908, "learning_rate": 9.90509115287998e-06, "loss": 2.9795604705810548, "step": 62570 }, { "epoch": 0.5051540566502264, "grad_norm": 0.6359099745750427, "learning_rate": 9.903475683141766e-06, "loss": 2.766880416870117, "step": 62580 }, { "epoch": 0.5052347779759935, "grad_norm": 1.0467517375946045, "learning_rate": 9.901860213403554e-06, "loss": 3.054052543640137, "step": 62590 }, { "epoch": 0.5053154993017606, "grad_norm": 1.955234169960022, "learning_rate": 9.90024474366534e-06, "loss": 2.7874011993408203, "step": 62600 }, { "epoch": 0.5053962206275275, "grad_norm": 1.018102765083313, "learning_rate": 9.898629273927128e-06, "loss": 2.876822090148926, "step": 62610 }, { "epoch": 0.5054769419532946, "grad_norm": 0.6983993053436279, "learning_rate": 9.897013804188914e-06, "loss": 2.8694345474243166, "step": 62620 }, { "epoch": 0.5055576632790617, "grad_norm": 1.0523823499679565, "learning_rate": 9.895398334450701e-06, "loss": 2.8043519973754885, "step": 62630 }, { "epoch": 0.5056383846048288, "grad_norm": 1.010887861251831, "learning_rate": 9.893782864712487e-06, "loss": 2.84150390625, "step": 62640 }, { "epoch": 0.5057191059305958, "grad_norm": 0.8890776038169861, "learning_rate": 9.892167394974275e-06, "loss": 2.921033477783203, "step": 62650 }, { "epoch": 0.5057998272563629, "grad_norm": 0.593903124332428, "learning_rate": 9.890551925236061e-06, "loss": 2.906652641296387, "step": 62660 }, { "epoch": 0.5058805485821299, "grad_norm": 1.3422675132751465, "learning_rate": 9.888936455497849e-06, "loss": 3.3420879364013674, "step": 62670 }, { "epoch": 0.5059612699078969, "grad_norm": 0.9637744426727295, "learning_rate": 9.887320985759635e-06, "loss": 2.852620315551758, "step": 62680 }, { "epoch": 0.506041991233664, "grad_norm": 1.1725378036499023, "learning_rate": 9.885705516021422e-06, "loss": 3.4985092163085936, "step": 62690 }, { "epoch": 0.5061227125594311, "grad_norm": 0.7762746214866638, "learning_rate": 9.884090046283208e-06, "loss": 3.034776306152344, "step": 62700 }, { "epoch": 0.5062034338851982, "grad_norm": 0.7357393503189087, "learning_rate": 9.882474576544996e-06, "loss": 2.5478212356567385, "step": 62710 }, { "epoch": 0.5062841552109651, "grad_norm": 0.7295218110084534, "learning_rate": 9.880859106806782e-06, "loss": 2.66312313079834, "step": 62720 }, { "epoch": 0.5063648765367322, "grad_norm": 0.9798949956893921, "learning_rate": 9.87924363706857e-06, "loss": 2.479270172119141, "step": 62730 }, { "epoch": 0.5064455978624993, "grad_norm": 0.8186240196228027, "learning_rate": 9.877628167330356e-06, "loss": 2.5978731155395507, "step": 62740 }, { "epoch": 0.5065263191882663, "grad_norm": 1.1917139291763306, "learning_rate": 9.876012697592144e-06, "loss": 2.752467918395996, "step": 62750 }, { "epoch": 0.5066070405140334, "grad_norm": 1.223244071006775, "learning_rate": 9.87439722785393e-06, "loss": 2.8683719635009766, "step": 62760 }, { "epoch": 0.5066877618398005, "grad_norm": 0.6035457253456116, "learning_rate": 9.872781758115717e-06, "loss": 2.9126667022705077, "step": 62770 }, { "epoch": 0.5067684831655676, "grad_norm": 1.314103126525879, "learning_rate": 9.871166288377503e-06, "loss": 3.119456672668457, "step": 62780 }, { "epoch": 0.5068492044913345, "grad_norm": 0.812274694442749, "learning_rate": 9.869550818639291e-06, "loss": 3.0080575942993164, "step": 62790 }, { "epoch": 0.5069299258171016, "grad_norm": 1.1807043552398682, "learning_rate": 9.867935348901077e-06, "loss": 2.4664649963378906, "step": 62800 }, { "epoch": 0.5070106471428687, "grad_norm": 1.258248209953308, "learning_rate": 9.866319879162865e-06, "loss": 2.5035558700561524, "step": 62810 }, { "epoch": 0.5070913684686358, "grad_norm": 0.9154230356216431, "learning_rate": 9.86470440942465e-06, "loss": 2.6497722625732423, "step": 62820 }, { "epoch": 0.5071720897944028, "grad_norm": 0.7739982604980469, "learning_rate": 9.863088939686438e-06, "loss": 2.7527151107788086, "step": 62830 }, { "epoch": 0.5072528111201698, "grad_norm": 1.0098094940185547, "learning_rate": 9.861473469948224e-06, "loss": 2.5261802673339844, "step": 62840 }, { "epoch": 0.5073335324459369, "grad_norm": 0.8236598372459412, "learning_rate": 9.859858000210012e-06, "loss": 2.3273523330688475, "step": 62850 }, { "epoch": 0.5074142537717039, "grad_norm": 1.3313629627227783, "learning_rate": 9.858242530471798e-06, "loss": 3.127116012573242, "step": 62860 }, { "epoch": 0.507494975097471, "grad_norm": 0.7484287023544312, "learning_rate": 9.856627060733586e-06, "loss": 2.8095359802246094, "step": 62870 }, { "epoch": 0.5075756964232381, "grad_norm": 1.2989946603775024, "learning_rate": 9.855011590995372e-06, "loss": 3.0838062286376955, "step": 62880 }, { "epoch": 0.5076564177490052, "grad_norm": 1.0781275033950806, "learning_rate": 9.85339612125716e-06, "loss": 2.583150100708008, "step": 62890 }, { "epoch": 0.5077371390747721, "grad_norm": 1.2922234535217285, "learning_rate": 9.851780651518947e-06, "loss": 3.221772003173828, "step": 62900 }, { "epoch": 0.5078178604005392, "grad_norm": 0.6091242432594299, "learning_rate": 9.850165181780733e-06, "loss": 3.4113994598388673, "step": 62910 }, { "epoch": 0.5078985817263063, "grad_norm": 0.749941349029541, "learning_rate": 9.84854971204252e-06, "loss": 2.699197769165039, "step": 62920 }, { "epoch": 0.5079793030520733, "grad_norm": 0.6952776312828064, "learning_rate": 9.846934242304307e-06, "loss": 2.563427543640137, "step": 62930 }, { "epoch": 0.5080600243778404, "grad_norm": 0.9778774380683899, "learning_rate": 9.845318772566094e-06, "loss": 2.6929042816162108, "step": 62940 }, { "epoch": 0.5081407457036075, "grad_norm": 1.3387607336044312, "learning_rate": 9.84370330282788e-06, "loss": 2.569280242919922, "step": 62950 }, { "epoch": 0.5082214670293745, "grad_norm": 0.9471925497055054, "learning_rate": 9.842087833089668e-06, "loss": 2.7424036026000977, "step": 62960 }, { "epoch": 0.5083021883551415, "grad_norm": 1.0753291845321655, "learning_rate": 9.840472363351454e-06, "loss": 2.767891502380371, "step": 62970 }, { "epoch": 0.5083829096809086, "grad_norm": 0.6664330959320068, "learning_rate": 9.838856893613242e-06, "loss": 2.555029296875, "step": 62980 }, { "epoch": 0.5084636310066757, "grad_norm": 1.2503939867019653, "learning_rate": 9.837241423875028e-06, "loss": 2.6277881622314454, "step": 62990 }, { "epoch": 0.5085443523324427, "grad_norm": 1.2896161079406738, "learning_rate": 9.835625954136816e-06, "loss": 2.8288505554199217, "step": 63000 }, { "epoch": 0.5086250736582097, "grad_norm": 0.7178511619567871, "learning_rate": 9.834010484398602e-06, "loss": 2.372758483886719, "step": 63010 }, { "epoch": 0.5087057949839768, "grad_norm": 1.238555669784546, "learning_rate": 9.83239501466039e-06, "loss": 2.266898345947266, "step": 63020 }, { "epoch": 0.5087865163097439, "grad_norm": 1.1378332376480103, "learning_rate": 9.830779544922175e-06, "loss": 2.7425451278686523, "step": 63030 }, { "epoch": 0.5088672376355109, "grad_norm": 0.778635561466217, "learning_rate": 9.829164075183963e-06, "loss": 2.8918298721313476, "step": 63040 }, { "epoch": 0.508947958961278, "grad_norm": 0.5393241047859192, "learning_rate": 9.827548605445749e-06, "loss": 2.445832443237305, "step": 63050 }, { "epoch": 0.5090286802870451, "grad_norm": 1.1587586402893066, "learning_rate": 9.825933135707537e-06, "loss": 2.6491390228271485, "step": 63060 }, { "epoch": 0.509109401612812, "grad_norm": 1.02911376953125, "learning_rate": 9.824317665969323e-06, "loss": 2.918614959716797, "step": 63070 }, { "epoch": 0.5091901229385791, "grad_norm": 1.3859189748764038, "learning_rate": 9.82270219623111e-06, "loss": 2.6811508178710937, "step": 63080 }, { "epoch": 0.5092708442643462, "grad_norm": 0.8460173010826111, "learning_rate": 9.821086726492896e-06, "loss": 3.187213134765625, "step": 63090 }, { "epoch": 0.5093515655901133, "grad_norm": 0.8863676190376282, "learning_rate": 9.819471256754684e-06, "loss": 2.4625892639160156, "step": 63100 }, { "epoch": 0.5094322869158803, "grad_norm": 0.9200359582901001, "learning_rate": 9.81785578701647e-06, "loss": 2.686476707458496, "step": 63110 }, { "epoch": 0.5095130082416474, "grad_norm": 0.772975742816925, "learning_rate": 9.816240317278258e-06, "loss": 3.182227897644043, "step": 63120 }, { "epoch": 0.5095937295674144, "grad_norm": 0.7832950949668884, "learning_rate": 9.814624847540044e-06, "loss": 2.581826591491699, "step": 63130 }, { "epoch": 0.5096744508931814, "grad_norm": 0.7890759706497192, "learning_rate": 9.813009377801831e-06, "loss": 3.0006614685058595, "step": 63140 }, { "epoch": 0.5097551722189485, "grad_norm": 1.1789897680282593, "learning_rate": 9.811393908063617e-06, "loss": 2.3308994293212892, "step": 63150 }, { "epoch": 0.5098358935447156, "grad_norm": 1.5759730339050293, "learning_rate": 9.809778438325405e-06, "loss": 3.0469944000244142, "step": 63160 }, { "epoch": 0.5099166148704827, "grad_norm": 1.1797966957092285, "learning_rate": 9.808162968587191e-06, "loss": 2.4349632263183594, "step": 63170 }, { "epoch": 0.5099973361962497, "grad_norm": 0.7338408827781677, "learning_rate": 9.806547498848979e-06, "loss": 2.7897377014160156, "step": 63180 }, { "epoch": 0.5100780575220167, "grad_norm": 1.1973210573196411, "learning_rate": 9.804932029110765e-06, "loss": 2.770579719543457, "step": 63190 }, { "epoch": 0.5101587788477838, "grad_norm": 0.6780831813812256, "learning_rate": 9.803316559372552e-06, "loss": 2.6145292282104493, "step": 63200 }, { "epoch": 0.5102395001735508, "grad_norm": 0.6331537961959839, "learning_rate": 9.801701089634338e-06, "loss": 2.556268310546875, "step": 63210 }, { "epoch": 0.5103202214993179, "grad_norm": 0.7659469246864319, "learning_rate": 9.800085619896126e-06, "loss": 2.8504510879516602, "step": 63220 }, { "epoch": 0.510400942825085, "grad_norm": 1.1827821731567383, "learning_rate": 9.798470150157912e-06, "loss": 2.768366050720215, "step": 63230 }, { "epoch": 0.510481664150852, "grad_norm": 1.0820821523666382, "learning_rate": 9.7968546804197e-06, "loss": 2.974858856201172, "step": 63240 }, { "epoch": 0.510562385476619, "grad_norm": 0.6613873839378357, "learning_rate": 9.795239210681486e-06, "loss": 3.0326654434204103, "step": 63250 }, { "epoch": 0.5106431068023861, "grad_norm": 0.917286217212677, "learning_rate": 9.793623740943274e-06, "loss": 3.027989387512207, "step": 63260 }, { "epoch": 0.5107238281281532, "grad_norm": 1.0184719562530518, "learning_rate": 9.79200827120506e-06, "loss": 3.0606414794921877, "step": 63270 }, { "epoch": 0.5108045494539202, "grad_norm": 1.280867338180542, "learning_rate": 9.790392801466847e-06, "loss": 2.950701141357422, "step": 63280 }, { "epoch": 0.5108852707796873, "grad_norm": 0.510859489440918, "learning_rate": 9.788777331728633e-06, "loss": 2.916955757141113, "step": 63290 }, { "epoch": 0.5109659921054543, "grad_norm": 0.9050807356834412, "learning_rate": 9.787161861990421e-06, "loss": 3.0812198638916017, "step": 63300 }, { "epoch": 0.5110467134312214, "grad_norm": 0.7006497383117676, "learning_rate": 9.785546392252207e-06, "loss": 2.828993797302246, "step": 63310 }, { "epoch": 0.5111274347569884, "grad_norm": 1.1851623058319092, "learning_rate": 9.783930922513995e-06, "loss": 2.771841621398926, "step": 63320 }, { "epoch": 0.5112081560827555, "grad_norm": 1.0678752660751343, "learning_rate": 9.78231545277578e-06, "loss": 2.451665687561035, "step": 63330 }, { "epoch": 0.5112888774085226, "grad_norm": 0.6550853252410889, "learning_rate": 9.780699983037568e-06, "loss": 2.789023017883301, "step": 63340 }, { "epoch": 0.5113695987342897, "grad_norm": 0.6176639199256897, "learning_rate": 9.779084513299354e-06, "loss": 2.8282587051391603, "step": 63350 }, { "epoch": 0.5114503200600566, "grad_norm": 1.1256499290466309, "learning_rate": 9.777469043561142e-06, "loss": 2.365408706665039, "step": 63360 }, { "epoch": 0.5115310413858237, "grad_norm": 0.9478667378425598, "learning_rate": 9.775853573822928e-06, "loss": 2.911732482910156, "step": 63370 }, { "epoch": 0.5116117627115908, "grad_norm": 1.1796858310699463, "learning_rate": 9.774238104084716e-06, "loss": 3.0008865356445313, "step": 63380 }, { "epoch": 0.5116924840373578, "grad_norm": 0.49278324842453003, "learning_rate": 9.772622634346502e-06, "loss": 2.785202407836914, "step": 63390 }, { "epoch": 0.5117732053631249, "grad_norm": 1.0319970846176147, "learning_rate": 9.77100716460829e-06, "loss": 2.621450996398926, "step": 63400 }, { "epoch": 0.511853926688892, "grad_norm": 0.9699057340621948, "learning_rate": 9.769391694870075e-06, "loss": 2.4325801849365236, "step": 63410 }, { "epoch": 0.511934648014659, "grad_norm": 0.6991472840309143, "learning_rate": 9.767776225131863e-06, "loss": 3.4255176544189454, "step": 63420 }, { "epoch": 0.512015369340426, "grad_norm": 1.9559422731399536, "learning_rate": 9.76616075539365e-06, "loss": 2.7181428909301757, "step": 63430 }, { "epoch": 0.5120960906661931, "grad_norm": 1.0894451141357422, "learning_rate": 9.764545285655437e-06, "loss": 3.1541229248046876, "step": 63440 }, { "epoch": 0.5121768119919602, "grad_norm": 0.6141522526741028, "learning_rate": 9.762929815917224e-06, "loss": 2.7822944641113283, "step": 63450 }, { "epoch": 0.5122575333177272, "grad_norm": 1.6828504800796509, "learning_rate": 9.76131434617901e-06, "loss": 2.413570213317871, "step": 63460 }, { "epoch": 0.5123382546434943, "grad_norm": 0.9132557511329651, "learning_rate": 9.759698876440798e-06, "loss": 2.567750358581543, "step": 63470 }, { "epoch": 0.5124189759692613, "grad_norm": 0.7225993275642395, "learning_rate": 9.758083406702584e-06, "loss": 2.3404361724853517, "step": 63480 }, { "epoch": 0.5124996972950284, "grad_norm": 1.8329707384109497, "learning_rate": 9.756467936964372e-06, "loss": 3.4662101745605467, "step": 63490 }, { "epoch": 0.5125804186207954, "grad_norm": 1.4911208152770996, "learning_rate": 9.754852467226158e-06, "loss": 2.7083263397216797, "step": 63500 }, { "epoch": 0.5126611399465625, "grad_norm": 1.109731674194336, "learning_rate": 9.753236997487946e-06, "loss": 2.766850471496582, "step": 63510 }, { "epoch": 0.5127418612723296, "grad_norm": 0.6549285054206848, "learning_rate": 9.751621527749732e-06, "loss": 2.585851860046387, "step": 63520 }, { "epoch": 0.5128225825980965, "grad_norm": 1.0578017234802246, "learning_rate": 9.75000605801152e-06, "loss": 2.3979034423828125, "step": 63530 }, { "epoch": 0.5129033039238636, "grad_norm": 0.9794686436653137, "learning_rate": 9.748390588273305e-06, "loss": 2.792698860168457, "step": 63540 }, { "epoch": 0.5129840252496307, "grad_norm": 0.9132699966430664, "learning_rate": 9.746775118535093e-06, "loss": 2.8702417373657227, "step": 63550 }, { "epoch": 0.5130647465753978, "grad_norm": 1.5515774488449097, "learning_rate": 9.74515964879688e-06, "loss": 3.2946750640869142, "step": 63560 }, { "epoch": 0.5131454679011648, "grad_norm": 1.115893006324768, "learning_rate": 9.743544179058667e-06, "loss": 2.7762731552124023, "step": 63570 }, { "epoch": 0.5132261892269319, "grad_norm": 0.5940079092979431, "learning_rate": 9.741928709320454e-06, "loss": 2.4300561904907227, "step": 63580 }, { "epoch": 0.513306910552699, "grad_norm": 0.9773955941200256, "learning_rate": 9.74031323958224e-06, "loss": 3.0535438537597654, "step": 63590 }, { "epoch": 0.5133876318784659, "grad_norm": 0.9423537254333496, "learning_rate": 9.738697769844028e-06, "loss": 2.665546417236328, "step": 63600 }, { "epoch": 0.513468353204233, "grad_norm": 1.2140125036239624, "learning_rate": 9.737082300105814e-06, "loss": 2.877104568481445, "step": 63610 }, { "epoch": 0.5135490745300001, "grad_norm": 1.4871834516525269, "learning_rate": 9.735466830367602e-06, "loss": 2.9671842575073244, "step": 63620 }, { "epoch": 0.5136297958557672, "grad_norm": 0.818813145160675, "learning_rate": 9.733851360629388e-06, "loss": 2.7775701522827148, "step": 63630 }, { "epoch": 0.5137105171815342, "grad_norm": 1.0793869495391846, "learning_rate": 9.732235890891175e-06, "loss": 2.8511377334594727, "step": 63640 }, { "epoch": 0.5137912385073012, "grad_norm": 0.7862015962600708, "learning_rate": 9.730620421152961e-06, "loss": 2.9183387756347656, "step": 63650 }, { "epoch": 0.5138719598330683, "grad_norm": 0.8615155220031738, "learning_rate": 9.729004951414749e-06, "loss": 2.44000244140625, "step": 63660 }, { "epoch": 0.5139526811588353, "grad_norm": 1.5448863506317139, "learning_rate": 9.727389481676535e-06, "loss": 3.018697166442871, "step": 63670 }, { "epoch": 0.5140334024846024, "grad_norm": 1.2364168167114258, "learning_rate": 9.725774011938323e-06, "loss": 2.733026695251465, "step": 63680 }, { "epoch": 0.5141141238103695, "grad_norm": 0.9453902840614319, "learning_rate": 9.724158542200109e-06, "loss": 2.8744367599487304, "step": 63690 }, { "epoch": 0.5141948451361366, "grad_norm": 0.7744700312614441, "learning_rate": 9.722543072461896e-06, "loss": 2.5299188613891603, "step": 63700 }, { "epoch": 0.5142755664619035, "grad_norm": 1.2590157985687256, "learning_rate": 9.720927602723682e-06, "loss": 3.3402324676513673, "step": 63710 }, { "epoch": 0.5143562877876706, "grad_norm": 0.6816662549972534, "learning_rate": 9.71931213298547e-06, "loss": 2.5542715072631834, "step": 63720 }, { "epoch": 0.5144370091134377, "grad_norm": 1.0346331596374512, "learning_rate": 9.717696663247256e-06, "loss": 3.023175811767578, "step": 63730 }, { "epoch": 0.5145177304392047, "grad_norm": 1.2478482723236084, "learning_rate": 9.716081193509044e-06, "loss": 2.883073616027832, "step": 63740 }, { "epoch": 0.5145984517649718, "grad_norm": 0.6987295150756836, "learning_rate": 9.71446572377083e-06, "loss": 2.6756013870239257, "step": 63750 }, { "epoch": 0.5146791730907389, "grad_norm": 1.3344833850860596, "learning_rate": 9.712850254032618e-06, "loss": 3.0096296310424804, "step": 63760 }, { "epoch": 0.5147598944165059, "grad_norm": 1.0191645622253418, "learning_rate": 9.711234784294405e-06, "loss": 3.449607086181641, "step": 63770 }, { "epoch": 0.5148406157422729, "grad_norm": 0.8965631723403931, "learning_rate": 9.709619314556191e-06, "loss": 2.835748291015625, "step": 63780 }, { "epoch": 0.51492133706804, "grad_norm": 0.9279645681381226, "learning_rate": 9.708003844817979e-06, "loss": 2.9173255920410157, "step": 63790 }, { "epoch": 0.5150020583938071, "grad_norm": 0.9679649472236633, "learning_rate": 9.706388375079765e-06, "loss": 2.6727373123168947, "step": 63800 }, { "epoch": 0.5150827797195742, "grad_norm": 0.7441786527633667, "learning_rate": 9.704772905341553e-06, "loss": 2.8616628646850586, "step": 63810 }, { "epoch": 0.5151635010453411, "grad_norm": 0.958591103553772, "learning_rate": 9.703157435603339e-06, "loss": 2.8828102111816407, "step": 63820 }, { "epoch": 0.5152442223711082, "grad_norm": 0.6305018067359924, "learning_rate": 9.701541965865126e-06, "loss": 2.8517738342285157, "step": 63830 }, { "epoch": 0.5153249436968753, "grad_norm": 0.8759307861328125, "learning_rate": 9.699926496126912e-06, "loss": 2.7566661834716797, "step": 63840 }, { "epoch": 0.5154056650226423, "grad_norm": 1.630861759185791, "learning_rate": 9.6983110263887e-06, "loss": 2.873583984375, "step": 63850 }, { "epoch": 0.5154863863484094, "grad_norm": 0.9754616618156433, "learning_rate": 9.696695556650486e-06, "loss": 2.797472381591797, "step": 63860 }, { "epoch": 0.5155671076741765, "grad_norm": 0.7271453142166138, "learning_rate": 9.695080086912274e-06, "loss": 2.8238094329833983, "step": 63870 }, { "epoch": 0.5156478289999435, "grad_norm": 0.9829028248786926, "learning_rate": 9.69346461717406e-06, "loss": 2.680799865722656, "step": 63880 }, { "epoch": 0.5157285503257105, "grad_norm": 0.8737095594406128, "learning_rate": 9.691849147435847e-06, "loss": 2.3817190170288085, "step": 63890 }, { "epoch": 0.5158092716514776, "grad_norm": 0.7150952816009521, "learning_rate": 9.690233677697633e-06, "loss": 2.565189552307129, "step": 63900 }, { "epoch": 0.5158899929772447, "grad_norm": 0.5862747430801392, "learning_rate": 9.688618207959421e-06, "loss": 2.9733375549316405, "step": 63910 }, { "epoch": 0.5159707143030117, "grad_norm": 1.084118366241455, "learning_rate": 9.687002738221207e-06, "loss": 3.1065555572509767, "step": 63920 }, { "epoch": 0.5160514356287788, "grad_norm": 0.8167080879211426, "learning_rate": 9.685387268482995e-06, "loss": 2.6885730743408205, "step": 63930 }, { "epoch": 0.5161321569545458, "grad_norm": 1.0761123895645142, "learning_rate": 9.68377179874478e-06, "loss": 2.8359766006469727, "step": 63940 }, { "epoch": 0.5162128782803129, "grad_norm": 0.8453913331031799, "learning_rate": 9.682156329006568e-06, "loss": 2.796213150024414, "step": 63950 }, { "epoch": 0.5162935996060799, "grad_norm": 0.9366353750228882, "learning_rate": 9.680540859268354e-06, "loss": 2.9610509872436523, "step": 63960 }, { "epoch": 0.516374320931847, "grad_norm": 0.8995727300643921, "learning_rate": 9.678925389530142e-06, "loss": 2.624774360656738, "step": 63970 }, { "epoch": 0.5164550422576141, "grad_norm": 1.0333627462387085, "learning_rate": 9.677309919791928e-06, "loss": 3.089485931396484, "step": 63980 }, { "epoch": 0.516535763583381, "grad_norm": 0.7994043827056885, "learning_rate": 9.675694450053716e-06, "loss": 2.800749397277832, "step": 63990 }, { "epoch": 0.5166164849091481, "grad_norm": 0.9083414077758789, "learning_rate": 9.674078980315502e-06, "loss": 2.991308403015137, "step": 64000 }, { "epoch": 0.5166972062349152, "grad_norm": 1.3777143955230713, "learning_rate": 9.67246351057729e-06, "loss": 2.4821823120117186, "step": 64010 }, { "epoch": 0.5167779275606823, "grad_norm": 0.7880132794380188, "learning_rate": 9.670848040839075e-06, "loss": 2.7159629821777345, "step": 64020 }, { "epoch": 0.5168586488864493, "grad_norm": 1.6929521560668945, "learning_rate": 9.669232571100863e-06, "loss": 3.2845989227294923, "step": 64030 }, { "epoch": 0.5169393702122164, "grad_norm": 1.0588326454162598, "learning_rate": 9.66761710136265e-06, "loss": 3.11787109375, "step": 64040 }, { "epoch": 0.5170200915379835, "grad_norm": 1.1019103527069092, "learning_rate": 9.666001631624437e-06, "loss": 2.843524932861328, "step": 64050 }, { "epoch": 0.5171008128637504, "grad_norm": 1.663564920425415, "learning_rate": 9.664386161886223e-06, "loss": 2.9604331970214846, "step": 64060 }, { "epoch": 0.5171815341895175, "grad_norm": 0.9122651219367981, "learning_rate": 9.66277069214801e-06, "loss": 2.4571624755859376, "step": 64070 }, { "epoch": 0.5172622555152846, "grad_norm": 0.6981228590011597, "learning_rate": 9.661155222409797e-06, "loss": 2.560942459106445, "step": 64080 }, { "epoch": 0.5173429768410517, "grad_norm": 0.7611421346664429, "learning_rate": 9.659539752671584e-06, "loss": 2.914346122741699, "step": 64090 }, { "epoch": 0.5174236981668187, "grad_norm": 0.7869390249252319, "learning_rate": 9.65792428293337e-06, "loss": 2.948895072937012, "step": 64100 }, { "epoch": 0.5175044194925857, "grad_norm": 1.4061896800994873, "learning_rate": 9.656308813195158e-06, "loss": 2.8295116424560547, "step": 64110 }, { "epoch": 0.5175851408183528, "grad_norm": 0.9459760785102844, "learning_rate": 9.654693343456944e-06, "loss": 2.771242141723633, "step": 64120 }, { "epoch": 0.5176658621441198, "grad_norm": 1.0697108507156372, "learning_rate": 9.653077873718732e-06, "loss": 2.875484657287598, "step": 64130 }, { "epoch": 0.5177465834698869, "grad_norm": 1.0549960136413574, "learning_rate": 9.651462403980518e-06, "loss": 2.8419145584106444, "step": 64140 }, { "epoch": 0.517827304795654, "grad_norm": 0.7063285708427429, "learning_rate": 9.649846934242305e-06, "loss": 2.7313024520874025, "step": 64150 }, { "epoch": 0.5179080261214211, "grad_norm": 0.744613528251648, "learning_rate": 9.648231464504091e-06, "loss": 2.8473388671875, "step": 64160 }, { "epoch": 0.517988747447188, "grad_norm": 1.0795025825500488, "learning_rate": 9.646615994765879e-06, "loss": 2.6070749282836916, "step": 64170 }, { "epoch": 0.5180694687729551, "grad_norm": 1.0541608333587646, "learning_rate": 9.645000525027665e-06, "loss": 2.463532257080078, "step": 64180 }, { "epoch": 0.5181501900987222, "grad_norm": 0.8968658447265625, "learning_rate": 9.643385055289453e-06, "loss": 2.555790328979492, "step": 64190 }, { "epoch": 0.5182309114244892, "grad_norm": 1.010322093963623, "learning_rate": 9.641769585551239e-06, "loss": 2.820961761474609, "step": 64200 }, { "epoch": 0.5183116327502563, "grad_norm": 1.020394206047058, "learning_rate": 9.640154115813026e-06, "loss": 2.8251535415649416, "step": 64210 }, { "epoch": 0.5183923540760234, "grad_norm": 0.7040609121322632, "learning_rate": 9.638538646074812e-06, "loss": 2.6099571228027343, "step": 64220 }, { "epoch": 0.5184730754017904, "grad_norm": 1.0845364332199097, "learning_rate": 9.6369231763366e-06, "loss": 2.6181480407714846, "step": 64230 }, { "epoch": 0.5185537967275574, "grad_norm": 0.9296708106994629, "learning_rate": 9.635307706598386e-06, "loss": 3.079971122741699, "step": 64240 }, { "epoch": 0.5186345180533245, "grad_norm": 1.2701492309570312, "learning_rate": 9.633692236860174e-06, "loss": 3.221649169921875, "step": 64250 }, { "epoch": 0.5187152393790916, "grad_norm": 1.2411911487579346, "learning_rate": 9.63207676712196e-06, "loss": 2.8525341033935545, "step": 64260 }, { "epoch": 0.5187959607048587, "grad_norm": 1.173764944076538, "learning_rate": 9.630461297383747e-06, "loss": 2.779567909240723, "step": 64270 }, { "epoch": 0.5188766820306256, "grad_norm": 1.0709620714187622, "learning_rate": 9.628845827645533e-06, "loss": 3.2365085601806642, "step": 64280 }, { "epoch": 0.5189574033563927, "grad_norm": 0.7575298547744751, "learning_rate": 9.627230357907321e-06, "loss": 2.865055274963379, "step": 64290 }, { "epoch": 0.5190381246821598, "grad_norm": 1.0122203826904297, "learning_rate": 9.625614888169109e-06, "loss": 2.432462692260742, "step": 64300 }, { "epoch": 0.5191188460079268, "grad_norm": 0.6858364939689636, "learning_rate": 9.623999418430895e-06, "loss": 2.606327247619629, "step": 64310 }, { "epoch": 0.5191995673336939, "grad_norm": 1.2203320264816284, "learning_rate": 9.622383948692683e-06, "loss": 2.6639341354370116, "step": 64320 }, { "epoch": 0.519280288659461, "grad_norm": 1.8952295780181885, "learning_rate": 9.620768478954469e-06, "loss": 2.7405948638916016, "step": 64330 }, { "epoch": 0.519361009985228, "grad_norm": 0.5795139670372009, "learning_rate": 9.619153009216256e-06, "loss": 2.5936519622802736, "step": 64340 }, { "epoch": 0.519441731310995, "grad_norm": 1.1113818883895874, "learning_rate": 9.617537539478042e-06, "loss": 2.631595802307129, "step": 64350 }, { "epoch": 0.5195224526367621, "grad_norm": 1.0694096088409424, "learning_rate": 9.61592206973983e-06, "loss": 3.230594253540039, "step": 64360 }, { "epoch": 0.5196031739625292, "grad_norm": 1.0817389488220215, "learning_rate": 9.614306600001616e-06, "loss": 2.948391914367676, "step": 64370 }, { "epoch": 0.5196838952882962, "grad_norm": 0.729038417339325, "learning_rate": 9.612691130263404e-06, "loss": 2.5945980072021486, "step": 64380 }, { "epoch": 0.5197646166140633, "grad_norm": 1.0443289279937744, "learning_rate": 9.61107566052519e-06, "loss": 2.7510133743286134, "step": 64390 }, { "epoch": 0.5198453379398303, "grad_norm": 1.114720344543457, "learning_rate": 9.609460190786977e-06, "loss": 2.799184036254883, "step": 64400 }, { "epoch": 0.5199260592655974, "grad_norm": 0.72383713722229, "learning_rate": 9.607844721048763e-06, "loss": 3.0841142654418947, "step": 64410 }, { "epoch": 0.5200067805913644, "grad_norm": 0.9576472640037537, "learning_rate": 9.606229251310551e-06, "loss": 3.09704475402832, "step": 64420 }, { "epoch": 0.5200875019171315, "grad_norm": 1.043474793434143, "learning_rate": 9.604613781572337e-06, "loss": 2.5993459701538084, "step": 64430 }, { "epoch": 0.5201682232428986, "grad_norm": 1.0458672046661377, "learning_rate": 9.602998311834125e-06, "loss": 3.259694290161133, "step": 64440 }, { "epoch": 0.5202489445686656, "grad_norm": 0.8626628518104553, "learning_rate": 9.60138284209591e-06, "loss": 3.0439109802246094, "step": 64450 }, { "epoch": 0.5203296658944326, "grad_norm": 0.8776865601539612, "learning_rate": 9.599767372357698e-06, "loss": 2.870458221435547, "step": 64460 }, { "epoch": 0.5204103872201997, "grad_norm": 1.2124896049499512, "learning_rate": 9.598151902619484e-06, "loss": 2.587351608276367, "step": 64470 }, { "epoch": 0.5204911085459668, "grad_norm": 0.7072463035583496, "learning_rate": 9.596536432881272e-06, "loss": 2.5188533782958986, "step": 64480 }, { "epoch": 0.5205718298717338, "grad_norm": 1.0021169185638428, "learning_rate": 9.594920963143058e-06, "loss": 2.633749580383301, "step": 64490 }, { "epoch": 0.5206525511975009, "grad_norm": 0.5558580756187439, "learning_rate": 9.593305493404846e-06, "loss": 2.8464496612548826, "step": 64500 }, { "epoch": 0.520733272523268, "grad_norm": 1.1196104288101196, "learning_rate": 9.591690023666632e-06, "loss": 3.2385311126708984, "step": 64510 }, { "epoch": 0.5208139938490349, "grad_norm": 0.7161329388618469, "learning_rate": 9.59007455392842e-06, "loss": 3.1754026412963867, "step": 64520 }, { "epoch": 0.520894715174802, "grad_norm": 0.6025381088256836, "learning_rate": 9.588459084190205e-06, "loss": 2.884919357299805, "step": 64530 }, { "epoch": 0.5209754365005691, "grad_norm": 1.0130741596221924, "learning_rate": 9.586843614451993e-06, "loss": 2.5830877304077147, "step": 64540 }, { "epoch": 0.5210561578263362, "grad_norm": 1.3968825340270996, "learning_rate": 9.58522814471378e-06, "loss": 2.9360475540161133, "step": 64550 }, { "epoch": 0.5211368791521032, "grad_norm": 1.0130923986434937, "learning_rate": 9.583612674975567e-06, "loss": 2.8530515670776366, "step": 64560 }, { "epoch": 0.5212176004778702, "grad_norm": 1.2950154542922974, "learning_rate": 9.581997205237353e-06, "loss": 2.707189178466797, "step": 64570 }, { "epoch": 0.5212983218036373, "grad_norm": 0.6832395195960999, "learning_rate": 9.58038173549914e-06, "loss": 2.644370269775391, "step": 64580 }, { "epoch": 0.5213790431294043, "grad_norm": 1.1108977794647217, "learning_rate": 9.578766265760927e-06, "loss": 2.7256502151489257, "step": 64590 }, { "epoch": 0.5214597644551714, "grad_norm": 1.0328619480133057, "learning_rate": 9.577150796022714e-06, "loss": 2.5434267044067385, "step": 64600 }, { "epoch": 0.5215404857809385, "grad_norm": 0.6764615774154663, "learning_rate": 9.5755353262845e-06, "loss": 2.906692123413086, "step": 64610 }, { "epoch": 0.5216212071067056, "grad_norm": 0.7810578942298889, "learning_rate": 9.573919856546288e-06, "loss": 2.6234758377075194, "step": 64620 }, { "epoch": 0.5217019284324725, "grad_norm": 1.0900391340255737, "learning_rate": 9.572304386808074e-06, "loss": 2.651998519897461, "step": 64630 }, { "epoch": 0.5217826497582396, "grad_norm": 0.8655108213424683, "learning_rate": 9.570688917069862e-06, "loss": 3.1259414672851564, "step": 64640 }, { "epoch": 0.5218633710840067, "grad_norm": 0.9916457533836365, "learning_rate": 9.569073447331648e-06, "loss": 2.8007665634155274, "step": 64650 }, { "epoch": 0.5219440924097737, "grad_norm": 1.0704927444458008, "learning_rate": 9.567457977593435e-06, "loss": 2.581192207336426, "step": 64660 }, { "epoch": 0.5220248137355408, "grad_norm": 1.2366955280303955, "learning_rate": 9.565842507855221e-06, "loss": 3.061376953125, "step": 64670 }, { "epoch": 0.5221055350613079, "grad_norm": 1.5963865518569946, "learning_rate": 9.564227038117009e-06, "loss": 3.1126071929931642, "step": 64680 }, { "epoch": 0.5221862563870749, "grad_norm": 1.3727304935455322, "learning_rate": 9.562611568378795e-06, "loss": 2.832012748718262, "step": 64690 }, { "epoch": 0.5222669777128419, "grad_norm": 1.0215470790863037, "learning_rate": 9.560996098640583e-06, "loss": 2.36993465423584, "step": 64700 }, { "epoch": 0.522347699038609, "grad_norm": 0.9263255000114441, "learning_rate": 9.559380628902369e-06, "loss": 2.8220252990722656, "step": 64710 }, { "epoch": 0.5224284203643761, "grad_norm": 1.4799422025680542, "learning_rate": 9.557765159164156e-06, "loss": 2.736778450012207, "step": 64720 }, { "epoch": 0.5225091416901431, "grad_norm": 1.1260603666305542, "learning_rate": 9.556149689425942e-06, "loss": 3.0799030303955077, "step": 64730 }, { "epoch": 0.5225898630159101, "grad_norm": 0.7852981090545654, "learning_rate": 9.55453421968773e-06, "loss": 2.9571775436401366, "step": 64740 }, { "epoch": 0.5226705843416772, "grad_norm": 1.3943195343017578, "learning_rate": 9.552918749949516e-06, "loss": 2.9165775299072267, "step": 64750 }, { "epoch": 0.5227513056674443, "grad_norm": 0.6982232928276062, "learning_rate": 9.551303280211304e-06, "loss": 2.832314300537109, "step": 64760 }, { "epoch": 0.5228320269932113, "grad_norm": 1.116369366645813, "learning_rate": 9.54968781047309e-06, "loss": 2.602357864379883, "step": 64770 }, { "epoch": 0.5229127483189784, "grad_norm": 1.1721618175506592, "learning_rate": 9.548072340734877e-06, "loss": 2.5482614517211912, "step": 64780 }, { "epoch": 0.5229934696447455, "grad_norm": 1.3609877824783325, "learning_rate": 9.546456870996663e-06, "loss": 2.885092544555664, "step": 64790 }, { "epoch": 0.5230741909705126, "grad_norm": 0.9074145555496216, "learning_rate": 9.544841401258451e-06, "loss": 2.6094118118286134, "step": 64800 }, { "epoch": 0.5231549122962795, "grad_norm": 1.131864070892334, "learning_rate": 9.543225931520239e-06, "loss": 2.634316062927246, "step": 64810 }, { "epoch": 0.5232356336220466, "grad_norm": 0.8979840874671936, "learning_rate": 9.541610461782027e-06, "loss": 2.7370412826538084, "step": 64820 }, { "epoch": 0.5233163549478137, "grad_norm": 1.3557597398757935, "learning_rate": 9.539994992043813e-06, "loss": 2.8321956634521483, "step": 64830 }, { "epoch": 0.5233970762735807, "grad_norm": 1.0613365173339844, "learning_rate": 9.5383795223056e-06, "loss": 3.1419597625732423, "step": 64840 }, { "epoch": 0.5234777975993478, "grad_norm": 1.0850268602371216, "learning_rate": 9.536764052567386e-06, "loss": 2.19708194732666, "step": 64850 }, { "epoch": 0.5235585189251148, "grad_norm": 2.045727014541626, "learning_rate": 9.535148582829174e-06, "loss": 2.9671194076538088, "step": 64860 }, { "epoch": 0.5236392402508819, "grad_norm": 0.6828126907348633, "learning_rate": 9.53353311309096e-06, "loss": 2.9613351821899414, "step": 64870 }, { "epoch": 0.5237199615766489, "grad_norm": 0.9574368596076965, "learning_rate": 9.531917643352748e-06, "loss": 2.7938987731933596, "step": 64880 }, { "epoch": 0.523800682902416, "grad_norm": 0.8409423828125, "learning_rate": 9.530302173614534e-06, "loss": 2.5655927658081055, "step": 64890 }, { "epoch": 0.5238814042281831, "grad_norm": 0.7336152195930481, "learning_rate": 9.528686703876321e-06, "loss": 3.0320072174072266, "step": 64900 }, { "epoch": 0.52396212555395, "grad_norm": 0.8331858515739441, "learning_rate": 9.527071234138107e-06, "loss": 2.552397918701172, "step": 64910 }, { "epoch": 0.5240428468797171, "grad_norm": 1.115618109703064, "learning_rate": 9.525455764399895e-06, "loss": 2.64011173248291, "step": 64920 }, { "epoch": 0.5241235682054842, "grad_norm": 1.104385495185852, "learning_rate": 9.523840294661681e-06, "loss": 2.7913705825805666, "step": 64930 }, { "epoch": 0.5242042895312513, "grad_norm": 1.08656644821167, "learning_rate": 9.522224824923469e-06, "loss": 2.633144760131836, "step": 64940 }, { "epoch": 0.5242850108570183, "grad_norm": 1.082595705986023, "learning_rate": 9.520609355185255e-06, "loss": 2.884308624267578, "step": 64950 }, { "epoch": 0.5243657321827854, "grad_norm": 0.7219843864440918, "learning_rate": 9.518993885447042e-06, "loss": 2.784539794921875, "step": 64960 }, { "epoch": 0.5244464535085525, "grad_norm": 0.7451971769332886, "learning_rate": 9.517378415708828e-06, "loss": 3.0482456207275392, "step": 64970 }, { "epoch": 0.5245271748343194, "grad_norm": 1.0048669576644897, "learning_rate": 9.515762945970616e-06, "loss": 2.417346954345703, "step": 64980 }, { "epoch": 0.5246078961600865, "grad_norm": 1.2746691703796387, "learning_rate": 9.514147476232402e-06, "loss": 2.605996513366699, "step": 64990 }, { "epoch": 0.5246886174858536, "grad_norm": 0.9801106452941895, "learning_rate": 9.51253200649419e-06, "loss": 3.197767639160156, "step": 65000 }, { "epoch": 0.5247693388116207, "grad_norm": 0.7165217995643616, "learning_rate": 9.510916536755976e-06, "loss": 2.8809345245361326, "step": 65010 }, { "epoch": 0.5248500601373877, "grad_norm": 0.9760799407958984, "learning_rate": 9.509301067017763e-06, "loss": 2.694009017944336, "step": 65020 }, { "epoch": 0.5249307814631547, "grad_norm": 0.7784865498542786, "learning_rate": 9.50768559727955e-06, "loss": 2.864436912536621, "step": 65030 }, { "epoch": 0.5250115027889218, "grad_norm": 0.9711369276046753, "learning_rate": 9.506070127541337e-06, "loss": 2.92224235534668, "step": 65040 }, { "epoch": 0.5250922241146888, "grad_norm": 1.0099583864212036, "learning_rate": 9.504454657803123e-06, "loss": 2.960546112060547, "step": 65050 }, { "epoch": 0.5251729454404559, "grad_norm": 0.6749166250228882, "learning_rate": 9.50283918806491e-06, "loss": 2.5942960739135743, "step": 65060 }, { "epoch": 0.525253666766223, "grad_norm": 0.884438693523407, "learning_rate": 9.501223718326697e-06, "loss": 2.6750802993774414, "step": 65070 }, { "epoch": 0.5253343880919901, "grad_norm": 0.916443407535553, "learning_rate": 9.499608248588485e-06, "loss": 2.5463876724243164, "step": 65080 }, { "epoch": 0.525415109417757, "grad_norm": 0.8066939115524292, "learning_rate": 9.49799277885027e-06, "loss": 2.6912065505981446, "step": 65090 }, { "epoch": 0.5254958307435241, "grad_norm": 0.7221829891204834, "learning_rate": 9.496377309112058e-06, "loss": 3.107767868041992, "step": 65100 }, { "epoch": 0.5255765520692912, "grad_norm": 0.8246440887451172, "learning_rate": 9.494761839373844e-06, "loss": 2.705929183959961, "step": 65110 }, { "epoch": 0.5256572733950582, "grad_norm": 0.7876575589179993, "learning_rate": 9.493146369635632e-06, "loss": 2.956707572937012, "step": 65120 }, { "epoch": 0.5257379947208253, "grad_norm": 0.9830672144889832, "learning_rate": 9.491530899897418e-06, "loss": 2.6293554306030273, "step": 65130 }, { "epoch": 0.5258187160465924, "grad_norm": 0.586866557598114, "learning_rate": 9.489915430159206e-06, "loss": 2.6820873260498046, "step": 65140 }, { "epoch": 0.5258994373723594, "grad_norm": 0.9138040542602539, "learning_rate": 9.488299960420992e-06, "loss": 2.743072509765625, "step": 65150 }, { "epoch": 0.5259801586981264, "grad_norm": 1.05842924118042, "learning_rate": 9.48668449068278e-06, "loss": 2.4889259338378906, "step": 65160 }, { "epoch": 0.5260608800238935, "grad_norm": 1.4331045150756836, "learning_rate": 9.485069020944567e-06, "loss": 2.5919736862182616, "step": 65170 }, { "epoch": 0.5261416013496606, "grad_norm": 1.258962631225586, "learning_rate": 9.483453551206353e-06, "loss": 2.5630142211914064, "step": 65180 }, { "epoch": 0.5262223226754276, "grad_norm": 1.3634941577911377, "learning_rate": 9.48183808146814e-06, "loss": 3.167497444152832, "step": 65190 }, { "epoch": 0.5263030440011947, "grad_norm": 0.6729845404624939, "learning_rate": 9.480222611729927e-06, "loss": 2.7743696212768554, "step": 65200 }, { "epoch": 0.5263837653269617, "grad_norm": 0.937279462814331, "learning_rate": 9.478607141991714e-06, "loss": 2.7316520690917967, "step": 65210 }, { "epoch": 0.5264644866527288, "grad_norm": 1.018130898475647, "learning_rate": 9.4769916722535e-06, "loss": 2.590126800537109, "step": 65220 }, { "epoch": 0.5265452079784958, "grad_norm": 0.8910723328590393, "learning_rate": 9.475376202515288e-06, "loss": 2.673160171508789, "step": 65230 }, { "epoch": 0.5266259293042629, "grad_norm": 0.9069771766662598, "learning_rate": 9.473760732777074e-06, "loss": 2.564958953857422, "step": 65240 }, { "epoch": 0.52670665063003, "grad_norm": 1.0630749464035034, "learning_rate": 9.472145263038862e-06, "loss": 2.567327117919922, "step": 65250 }, { "epoch": 0.5267873719557971, "grad_norm": 0.9377865791320801, "learning_rate": 9.470529793300648e-06, "loss": 2.2602890014648436, "step": 65260 }, { "epoch": 0.526868093281564, "grad_norm": 0.7621026635169983, "learning_rate": 9.468914323562435e-06, "loss": 2.3461957931518556, "step": 65270 }, { "epoch": 0.5269488146073311, "grad_norm": 1.345349907875061, "learning_rate": 9.467298853824221e-06, "loss": 2.868113899230957, "step": 65280 }, { "epoch": 0.5270295359330982, "grad_norm": 1.652503490447998, "learning_rate": 9.465683384086009e-06, "loss": 2.5709203720092773, "step": 65290 }, { "epoch": 0.5271102572588652, "grad_norm": 1.3822270631790161, "learning_rate": 9.464067914347795e-06, "loss": 2.6970714569091796, "step": 65300 }, { "epoch": 0.5271909785846323, "grad_norm": 0.8066661953926086, "learning_rate": 9.462452444609583e-06, "loss": 2.971063232421875, "step": 65310 }, { "epoch": 0.5272716999103993, "grad_norm": 1.1163170337677002, "learning_rate": 9.460836974871369e-06, "loss": 2.8514915466308595, "step": 65320 }, { "epoch": 0.5273524212361664, "grad_norm": 0.7864810824394226, "learning_rate": 9.459221505133157e-06, "loss": 2.6937273025512694, "step": 65330 }, { "epoch": 0.5274331425619334, "grad_norm": 1.0681390762329102, "learning_rate": 9.457606035394943e-06, "loss": 2.719151496887207, "step": 65340 }, { "epoch": 0.5275138638877005, "grad_norm": 0.9049519300460815, "learning_rate": 9.45599056565673e-06, "loss": 2.7625171661376955, "step": 65350 }, { "epoch": 0.5275945852134676, "grad_norm": 1.105280876159668, "learning_rate": 9.454375095918516e-06, "loss": 2.502730369567871, "step": 65360 }, { "epoch": 0.5276753065392346, "grad_norm": 1.2004936933517456, "learning_rate": 9.452759626180304e-06, "loss": 2.843863677978516, "step": 65370 }, { "epoch": 0.5277560278650016, "grad_norm": 0.944564163684845, "learning_rate": 9.45114415644209e-06, "loss": 3.0592573165893553, "step": 65380 }, { "epoch": 0.5278367491907687, "grad_norm": 0.8760553002357483, "learning_rate": 9.449528686703878e-06, "loss": 2.536672592163086, "step": 65390 }, { "epoch": 0.5279174705165358, "grad_norm": 1.1180622577667236, "learning_rate": 9.447913216965664e-06, "loss": 2.951339340209961, "step": 65400 }, { "epoch": 0.5279981918423028, "grad_norm": 1.7633556127548218, "learning_rate": 9.446297747227451e-06, "loss": 2.9997188568115236, "step": 65410 }, { "epoch": 0.5280789131680699, "grad_norm": 0.690890908241272, "learning_rate": 9.444682277489237e-06, "loss": 2.750284957885742, "step": 65420 }, { "epoch": 0.528159634493837, "grad_norm": 0.9137788414955139, "learning_rate": 9.443066807751025e-06, "loss": 2.6583860397338865, "step": 65430 }, { "epoch": 0.5282403558196039, "grad_norm": 0.8228587508201599, "learning_rate": 9.441451338012811e-06, "loss": 2.99218635559082, "step": 65440 }, { "epoch": 0.528321077145371, "grad_norm": 1.088708758354187, "learning_rate": 9.439835868274599e-06, "loss": 2.938623237609863, "step": 65450 }, { "epoch": 0.5284017984711381, "grad_norm": 0.7248108386993408, "learning_rate": 9.438220398536385e-06, "loss": 2.6857980728149413, "step": 65460 }, { "epoch": 0.5284825197969052, "grad_norm": 0.9567210078239441, "learning_rate": 9.436604928798172e-06, "loss": 2.769500732421875, "step": 65470 }, { "epoch": 0.5285632411226722, "grad_norm": 0.5851685404777527, "learning_rate": 9.434989459059958e-06, "loss": 2.5843669891357424, "step": 65480 }, { "epoch": 0.5286439624484393, "grad_norm": 0.7290937304496765, "learning_rate": 9.433373989321746e-06, "loss": 2.719894027709961, "step": 65490 }, { "epoch": 0.5287246837742063, "grad_norm": 0.7283287048339844, "learning_rate": 9.431758519583532e-06, "loss": 2.478264045715332, "step": 65500 }, { "epoch": 0.5288054050999733, "grad_norm": 1.303138256072998, "learning_rate": 9.43014304984532e-06, "loss": 2.737500762939453, "step": 65510 }, { "epoch": 0.5288861264257404, "grad_norm": 0.9940485954284668, "learning_rate": 9.428527580107106e-06, "loss": 2.905268096923828, "step": 65520 }, { "epoch": 0.5289668477515075, "grad_norm": 1.3440579175949097, "learning_rate": 9.426912110368893e-06, "loss": 3.1509353637695314, "step": 65530 }, { "epoch": 0.5290475690772746, "grad_norm": 1.0651296377182007, "learning_rate": 9.42529664063068e-06, "loss": 3.1963903427124025, "step": 65540 }, { "epoch": 0.5291282904030415, "grad_norm": 0.7689242362976074, "learning_rate": 9.423681170892467e-06, "loss": 2.796291160583496, "step": 65550 }, { "epoch": 0.5292090117288086, "grad_norm": 0.5603083968162537, "learning_rate": 9.422065701154253e-06, "loss": 2.594018745422363, "step": 65560 }, { "epoch": 0.5292897330545757, "grad_norm": 0.9986757636070251, "learning_rate": 9.42045023141604e-06, "loss": 2.770059585571289, "step": 65570 }, { "epoch": 0.5293704543803427, "grad_norm": 0.6838526129722595, "learning_rate": 9.418834761677827e-06, "loss": 2.9004499435424806, "step": 65580 }, { "epoch": 0.5294511757061098, "grad_norm": 1.210911750793457, "learning_rate": 9.417219291939615e-06, "loss": 3.2907642364501952, "step": 65590 }, { "epoch": 0.5295318970318769, "grad_norm": 1.0518990755081177, "learning_rate": 9.4156038222014e-06, "loss": 2.8489742279052734, "step": 65600 }, { "epoch": 0.529612618357644, "grad_norm": 0.7293334007263184, "learning_rate": 9.413988352463188e-06, "loss": 2.6737831115722654, "step": 65610 }, { "epoch": 0.5296933396834109, "grad_norm": 1.5709924697875977, "learning_rate": 9.412372882724974e-06, "loss": 2.9521995544433595, "step": 65620 }, { "epoch": 0.529774061009178, "grad_norm": 1.3957940340042114, "learning_rate": 9.410757412986762e-06, "loss": 3.055232048034668, "step": 65630 }, { "epoch": 0.5298547823349451, "grad_norm": 0.7418118715286255, "learning_rate": 9.409141943248548e-06, "loss": 2.792597007751465, "step": 65640 }, { "epoch": 0.5299355036607121, "grad_norm": 0.8807697892189026, "learning_rate": 9.407526473510336e-06, "loss": 2.709752082824707, "step": 65650 }, { "epoch": 0.5300162249864792, "grad_norm": 1.0771557092666626, "learning_rate": 9.405911003772122e-06, "loss": 2.7332742691040037, "step": 65660 }, { "epoch": 0.5300969463122462, "grad_norm": 0.943317174911499, "learning_rate": 9.40429553403391e-06, "loss": 3.0331295013427733, "step": 65670 }, { "epoch": 0.5301776676380133, "grad_norm": 0.9210772514343262, "learning_rate": 9.402680064295697e-06, "loss": 2.9892133712768554, "step": 65680 }, { "epoch": 0.5302583889637803, "grad_norm": 1.187584638595581, "learning_rate": 9.401064594557483e-06, "loss": 2.928269958496094, "step": 65690 }, { "epoch": 0.5303391102895474, "grad_norm": 0.9239681959152222, "learning_rate": 9.39944912481927e-06, "loss": 2.9057071685791014, "step": 65700 }, { "epoch": 0.5304198316153145, "grad_norm": 1.3282465934753418, "learning_rate": 9.397833655081057e-06, "loss": 2.5894474029541015, "step": 65710 }, { "epoch": 0.5305005529410814, "grad_norm": 0.5698654651641846, "learning_rate": 9.396218185342844e-06, "loss": 3.331751251220703, "step": 65720 }, { "epoch": 0.5305812742668485, "grad_norm": 0.9549896717071533, "learning_rate": 9.39460271560463e-06, "loss": 2.8503040313720702, "step": 65730 }, { "epoch": 0.5306619955926156, "grad_norm": 0.8751822710037231, "learning_rate": 9.392987245866418e-06, "loss": 2.647481346130371, "step": 65740 }, { "epoch": 0.5307427169183827, "grad_norm": 0.967232882976532, "learning_rate": 9.391371776128204e-06, "loss": 2.6413848876953123, "step": 65750 }, { "epoch": 0.5308234382441497, "grad_norm": 1.0436310768127441, "learning_rate": 9.389756306389992e-06, "loss": 2.7341030120849608, "step": 65760 }, { "epoch": 0.5309041595699168, "grad_norm": 1.506332516670227, "learning_rate": 9.388140836651778e-06, "loss": 3.1479944229125976, "step": 65770 }, { "epoch": 0.5309848808956839, "grad_norm": 0.852088212966919, "learning_rate": 9.386525366913565e-06, "loss": 2.656194496154785, "step": 65780 }, { "epoch": 0.5310656022214509, "grad_norm": 0.5907131433486938, "learning_rate": 9.384909897175351e-06, "loss": 2.8068216323852537, "step": 65790 }, { "epoch": 0.5311463235472179, "grad_norm": 1.5354877710342407, "learning_rate": 9.383294427437139e-06, "loss": 2.9004743576049803, "step": 65800 }, { "epoch": 0.531227044872985, "grad_norm": 0.7686875462532043, "learning_rate": 9.381678957698925e-06, "loss": 2.7602970123291017, "step": 65810 }, { "epoch": 0.5313077661987521, "grad_norm": 1.0416239500045776, "learning_rate": 9.380063487960713e-06, "loss": 3.0210657119750977, "step": 65820 }, { "epoch": 0.5313884875245191, "grad_norm": 1.1136410236358643, "learning_rate": 9.378448018222499e-06, "loss": 2.774811935424805, "step": 65830 }, { "epoch": 0.5314692088502861, "grad_norm": 0.6971298456192017, "learning_rate": 9.376832548484286e-06, "loss": 2.7506858825683596, "step": 65840 }, { "epoch": 0.5315499301760532, "grad_norm": 0.7021262645721436, "learning_rate": 9.375217078746072e-06, "loss": 3.1609859466552734, "step": 65850 }, { "epoch": 0.5316306515018203, "grad_norm": 0.7631459832191467, "learning_rate": 9.37360160900786e-06, "loss": 2.788211441040039, "step": 65860 }, { "epoch": 0.5317113728275873, "grad_norm": 1.1656081676483154, "learning_rate": 9.371986139269646e-06, "loss": 2.3815950393676757, "step": 65870 }, { "epoch": 0.5317920941533544, "grad_norm": 1.1503804922103882, "learning_rate": 9.370370669531434e-06, "loss": 2.594738578796387, "step": 65880 }, { "epoch": 0.5318728154791215, "grad_norm": 0.6394961476325989, "learning_rate": 9.36875519979322e-06, "loss": 3.4120845794677734, "step": 65890 }, { "epoch": 0.5319535368048884, "grad_norm": 0.6796252727508545, "learning_rate": 9.367139730055008e-06, "loss": 2.6844913482666017, "step": 65900 }, { "epoch": 0.5320342581306555, "grad_norm": 1.0152838230133057, "learning_rate": 9.365524260316794e-06, "loss": 2.6335012435913088, "step": 65910 }, { "epoch": 0.5321149794564226, "grad_norm": 1.1184327602386475, "learning_rate": 9.363908790578581e-06, "loss": 2.5644977569580076, "step": 65920 }, { "epoch": 0.5321957007821897, "grad_norm": 1.1710054874420166, "learning_rate": 9.362293320840367e-06, "loss": 2.6796375274658204, "step": 65930 }, { "epoch": 0.5322764221079567, "grad_norm": 0.8249503374099731, "learning_rate": 9.360677851102155e-06, "loss": 2.9443424224853514, "step": 65940 }, { "epoch": 0.5323571434337238, "grad_norm": 0.5879439115524292, "learning_rate": 9.359062381363941e-06, "loss": 2.5960247039794924, "step": 65950 }, { "epoch": 0.5324378647594908, "grad_norm": 0.6099218726158142, "learning_rate": 9.357446911625729e-06, "loss": 2.9579601287841797, "step": 65960 }, { "epoch": 0.5325185860852578, "grad_norm": 1.3119480609893799, "learning_rate": 9.355831441887515e-06, "loss": 2.5965065002441405, "step": 65970 }, { "epoch": 0.5325993074110249, "grad_norm": 0.790392279624939, "learning_rate": 9.354215972149302e-06, "loss": 2.672810363769531, "step": 65980 }, { "epoch": 0.532680028736792, "grad_norm": 1.1508326530456543, "learning_rate": 9.352600502411088e-06, "loss": 2.7968063354492188, "step": 65990 }, { "epoch": 0.5327607500625591, "grad_norm": 1.1830778121948242, "learning_rate": 9.350985032672876e-06, "loss": 2.5951564788818358, "step": 66000 }, { "epoch": 0.532841471388326, "grad_norm": 0.723846435546875, "learning_rate": 9.349369562934662e-06, "loss": 3.36201171875, "step": 66010 }, { "epoch": 0.5329221927140931, "grad_norm": 0.7535169124603271, "learning_rate": 9.34775409319645e-06, "loss": 2.4787084579467775, "step": 66020 }, { "epoch": 0.5330029140398602, "grad_norm": 0.6774984002113342, "learning_rate": 9.346138623458236e-06, "loss": 2.7822980880737305, "step": 66030 }, { "epoch": 0.5330836353656272, "grad_norm": 0.9593552350997925, "learning_rate": 9.344523153720023e-06, "loss": 2.7402029037475586, "step": 66040 }, { "epoch": 0.5331643566913943, "grad_norm": 0.9286560416221619, "learning_rate": 9.34290768398181e-06, "loss": 3.2395862579345702, "step": 66050 }, { "epoch": 0.5332450780171614, "grad_norm": 2.989792585372925, "learning_rate": 9.341292214243599e-06, "loss": 3.0531259536743165, "step": 66060 }, { "epoch": 0.5333257993429285, "grad_norm": 1.462902307510376, "learning_rate": 9.339676744505385e-06, "loss": 3.049867057800293, "step": 66070 }, { "epoch": 0.5334065206686954, "grad_norm": 0.7914530038833618, "learning_rate": 9.338061274767172e-06, "loss": 2.9893062591552733, "step": 66080 }, { "epoch": 0.5334872419944625, "grad_norm": 1.4775164127349854, "learning_rate": 9.336445805028958e-06, "loss": 2.689421844482422, "step": 66090 }, { "epoch": 0.5335679633202296, "grad_norm": 1.4425982236862183, "learning_rate": 9.334830335290746e-06, "loss": 2.3786420822143555, "step": 66100 }, { "epoch": 0.5336486846459966, "grad_norm": 0.7443529963493347, "learning_rate": 9.333214865552532e-06, "loss": 3.185619354248047, "step": 66110 }, { "epoch": 0.5337294059717637, "grad_norm": 0.770043134689331, "learning_rate": 9.33159939581432e-06, "loss": 2.930497741699219, "step": 66120 }, { "epoch": 0.5338101272975307, "grad_norm": 1.528737187385559, "learning_rate": 9.329983926076106e-06, "loss": 3.6482780456542967, "step": 66130 }, { "epoch": 0.5338908486232978, "grad_norm": 0.6479588150978088, "learning_rate": 9.328368456337894e-06, "loss": 2.58026180267334, "step": 66140 }, { "epoch": 0.5339715699490648, "grad_norm": 0.881510317325592, "learning_rate": 9.32675298659968e-06, "loss": 2.670924758911133, "step": 66150 }, { "epoch": 0.5340522912748319, "grad_norm": 0.633698582649231, "learning_rate": 9.325137516861467e-06, "loss": 2.80657844543457, "step": 66160 }, { "epoch": 0.534133012600599, "grad_norm": 0.9011062383651733, "learning_rate": 9.323522047123253e-06, "loss": 2.5309011459350588, "step": 66170 }, { "epoch": 0.534213733926366, "grad_norm": 0.972507655620575, "learning_rate": 9.321906577385041e-06, "loss": 2.6427783966064453, "step": 66180 }, { "epoch": 0.534294455252133, "grad_norm": 1.1856441497802734, "learning_rate": 9.320291107646827e-06, "loss": 2.5716094970703125, "step": 66190 }, { "epoch": 0.5343751765779001, "grad_norm": 0.6869521737098694, "learning_rate": 9.318675637908615e-06, "loss": 2.6094194412231446, "step": 66200 }, { "epoch": 0.5344558979036672, "grad_norm": 0.7056351900100708, "learning_rate": 9.3170601681704e-06, "loss": 2.8201507568359374, "step": 66210 }, { "epoch": 0.5345366192294342, "grad_norm": 0.5980985760688782, "learning_rate": 9.315444698432188e-06, "loss": 2.752850341796875, "step": 66220 }, { "epoch": 0.5346173405552013, "grad_norm": 0.7236535549163818, "learning_rate": 9.313829228693974e-06, "loss": 2.745351219177246, "step": 66230 }, { "epoch": 0.5346980618809684, "grad_norm": 0.8528858423233032, "learning_rate": 9.312213758955762e-06, "loss": 2.3130075454711916, "step": 66240 }, { "epoch": 0.5347787832067354, "grad_norm": 1.8777779340744019, "learning_rate": 9.310598289217548e-06, "loss": 3.0159082412719727, "step": 66250 }, { "epoch": 0.5348595045325024, "grad_norm": 0.8346825838088989, "learning_rate": 9.308982819479336e-06, "loss": 2.8432649612426757, "step": 66260 }, { "epoch": 0.5349402258582695, "grad_norm": 1.1823574304580688, "learning_rate": 9.307367349741122e-06, "loss": 2.489723968505859, "step": 66270 }, { "epoch": 0.5350209471840366, "grad_norm": 1.2734936475753784, "learning_rate": 9.30575188000291e-06, "loss": 2.9840721130371093, "step": 66280 }, { "epoch": 0.5351016685098036, "grad_norm": 0.657930850982666, "learning_rate": 9.304136410264695e-06, "loss": 2.78857479095459, "step": 66290 }, { "epoch": 0.5351823898355706, "grad_norm": 0.9570196270942688, "learning_rate": 9.302520940526483e-06, "loss": 2.8933862686157226, "step": 66300 }, { "epoch": 0.5352631111613377, "grad_norm": 1.685032606124878, "learning_rate": 9.300905470788269e-06, "loss": 2.6658708572387697, "step": 66310 }, { "epoch": 0.5353438324871048, "grad_norm": 0.8135156035423279, "learning_rate": 9.299290001050057e-06, "loss": 2.7154077529907226, "step": 66320 }, { "epoch": 0.5354245538128718, "grad_norm": 1.0360900163650513, "learning_rate": 9.297674531311843e-06, "loss": 2.7485994338989257, "step": 66330 }, { "epoch": 0.5355052751386389, "grad_norm": 0.8331523537635803, "learning_rate": 9.29605906157363e-06, "loss": 2.3902099609375, "step": 66340 }, { "epoch": 0.535585996464406, "grad_norm": 1.4912844896316528, "learning_rate": 9.294443591835416e-06, "loss": 2.7969615936279295, "step": 66350 }, { "epoch": 0.5356667177901729, "grad_norm": 1.306708812713623, "learning_rate": 9.292828122097204e-06, "loss": 3.37176399230957, "step": 66360 }, { "epoch": 0.53574743911594, "grad_norm": 1.5178321599960327, "learning_rate": 9.29121265235899e-06, "loss": 3.3459026336669924, "step": 66370 }, { "epoch": 0.5358281604417071, "grad_norm": 0.7720452547073364, "learning_rate": 9.289597182620778e-06, "loss": 2.753012466430664, "step": 66380 }, { "epoch": 0.5359088817674742, "grad_norm": 1.0067111253738403, "learning_rate": 9.287981712882564e-06, "loss": 2.6152963638305664, "step": 66390 }, { "epoch": 0.5359896030932412, "grad_norm": 1.2312837839126587, "learning_rate": 9.286366243144352e-06, "loss": 2.4334716796875, "step": 66400 }, { "epoch": 0.5360703244190083, "grad_norm": 0.6125293970108032, "learning_rate": 9.284750773406138e-06, "loss": 2.5569982528686523, "step": 66410 }, { "epoch": 0.5361510457447753, "grad_norm": 0.9741295576095581, "learning_rate": 9.283135303667925e-06, "loss": 2.602364349365234, "step": 66420 }, { "epoch": 0.5362317670705423, "grad_norm": 1.2807464599609375, "learning_rate": 9.281519833929711e-06, "loss": 2.4354305267333984, "step": 66430 }, { "epoch": 0.5363124883963094, "grad_norm": 0.8727061152458191, "learning_rate": 9.279904364191499e-06, "loss": 2.8224287033081055, "step": 66440 }, { "epoch": 0.5363932097220765, "grad_norm": 0.9071477651596069, "learning_rate": 9.278288894453285e-06, "loss": 2.8557561874389648, "step": 66450 }, { "epoch": 0.5364739310478436, "grad_norm": 0.8588623404502869, "learning_rate": 9.276673424715073e-06, "loss": 2.9092817306518555, "step": 66460 }, { "epoch": 0.5365546523736106, "grad_norm": 0.8277011513710022, "learning_rate": 9.275057954976859e-06, "loss": 2.591069793701172, "step": 66470 }, { "epoch": 0.5366353736993776, "grad_norm": 0.5133955478668213, "learning_rate": 9.273442485238646e-06, "loss": 2.6160406112670898, "step": 66480 }, { "epoch": 0.5367160950251447, "grad_norm": 1.1003775596618652, "learning_rate": 9.271827015500432e-06, "loss": 3.3375431060791017, "step": 66490 }, { "epoch": 0.5367968163509117, "grad_norm": 0.9516619443893433, "learning_rate": 9.27021154576222e-06, "loss": 2.3260942459106446, "step": 66500 }, { "epoch": 0.5368775376766788, "grad_norm": 0.6397386193275452, "learning_rate": 9.268596076024006e-06, "loss": 2.5860795974731445, "step": 66510 }, { "epoch": 0.5369582590024459, "grad_norm": 0.8910925984382629, "learning_rate": 9.266980606285794e-06, "loss": 2.6757204055786135, "step": 66520 }, { "epoch": 0.537038980328213, "grad_norm": 0.8300216197967529, "learning_rate": 9.26536513654758e-06, "loss": 2.58972110748291, "step": 66530 }, { "epoch": 0.5371197016539799, "grad_norm": 1.3818711042404175, "learning_rate": 9.263749666809367e-06, "loss": 2.432084655761719, "step": 66540 }, { "epoch": 0.537200422979747, "grad_norm": 1.1201103925704956, "learning_rate": 9.262134197071155e-06, "loss": 2.3886289596557617, "step": 66550 }, { "epoch": 0.5372811443055141, "grad_norm": 1.0926814079284668, "learning_rate": 9.260518727332941e-06, "loss": 2.8628255844116213, "step": 66560 }, { "epoch": 0.5373618656312811, "grad_norm": 0.7364045977592468, "learning_rate": 9.258903257594729e-06, "loss": 2.901620101928711, "step": 66570 }, { "epoch": 0.5374425869570482, "grad_norm": 0.8874029517173767, "learning_rate": 9.257287787856515e-06, "loss": 2.7138654708862306, "step": 66580 }, { "epoch": 0.5375233082828152, "grad_norm": 0.9776442646980286, "learning_rate": 9.255672318118302e-06, "loss": 3.031577301025391, "step": 66590 }, { "epoch": 0.5376040296085823, "grad_norm": 1.1234538555145264, "learning_rate": 9.254056848380088e-06, "loss": 2.49621524810791, "step": 66600 }, { "epoch": 0.5376847509343493, "grad_norm": 0.6250044703483582, "learning_rate": 9.252441378641876e-06, "loss": 2.554145622253418, "step": 66610 }, { "epoch": 0.5377654722601164, "grad_norm": 0.7169772386550903, "learning_rate": 9.250825908903662e-06, "loss": 2.665683555603027, "step": 66620 }, { "epoch": 0.5378461935858835, "grad_norm": 0.9478522539138794, "learning_rate": 9.24921043916545e-06, "loss": 3.0286046981811525, "step": 66630 }, { "epoch": 0.5379269149116505, "grad_norm": 1.1357430219650269, "learning_rate": 9.247594969427236e-06, "loss": 2.6346403121948243, "step": 66640 }, { "epoch": 0.5380076362374175, "grad_norm": 1.2825353145599365, "learning_rate": 9.245979499689024e-06, "loss": 2.4737361907958983, "step": 66650 }, { "epoch": 0.5380883575631846, "grad_norm": 1.082336187362671, "learning_rate": 9.24436402995081e-06, "loss": 2.509202003479004, "step": 66660 }, { "epoch": 0.5381690788889517, "grad_norm": 1.4869152307510376, "learning_rate": 9.242748560212597e-06, "loss": 2.657658004760742, "step": 66670 }, { "epoch": 0.5382498002147187, "grad_norm": 0.83308345079422, "learning_rate": 9.241133090474383e-06, "loss": 2.5352651596069338, "step": 66680 }, { "epoch": 0.5383305215404858, "grad_norm": 1.4812840223312378, "learning_rate": 9.239517620736171e-06, "loss": 2.8901952743530273, "step": 66690 }, { "epoch": 0.5384112428662529, "grad_norm": 0.8714845180511475, "learning_rate": 9.237902150997957e-06, "loss": 2.8594837188720703, "step": 66700 }, { "epoch": 0.53849196419202, "grad_norm": 0.7831894159317017, "learning_rate": 9.236286681259745e-06, "loss": 3.016472053527832, "step": 66710 }, { "epoch": 0.5385726855177869, "grad_norm": 0.7653547525405884, "learning_rate": 9.23467121152153e-06, "loss": 2.58685359954834, "step": 66720 }, { "epoch": 0.538653406843554, "grad_norm": 1.2122764587402344, "learning_rate": 9.233055741783318e-06, "loss": 2.6899206161499025, "step": 66730 }, { "epoch": 0.5387341281693211, "grad_norm": 0.9752094745635986, "learning_rate": 9.231440272045104e-06, "loss": 3.018997573852539, "step": 66740 }, { "epoch": 0.5388148494950881, "grad_norm": 0.781869113445282, "learning_rate": 9.229824802306892e-06, "loss": 2.851007080078125, "step": 66750 }, { "epoch": 0.5388955708208552, "grad_norm": 1.2040252685546875, "learning_rate": 9.228209332568678e-06, "loss": 2.8413028717041016, "step": 66760 }, { "epoch": 0.5389762921466222, "grad_norm": 0.7623783946037292, "learning_rate": 9.226593862830466e-06, "loss": 2.5244461059570313, "step": 66770 }, { "epoch": 0.5390570134723893, "grad_norm": 0.876397430896759, "learning_rate": 9.224978393092252e-06, "loss": 2.6950878143310546, "step": 66780 }, { "epoch": 0.5391377347981563, "grad_norm": 0.8943349123001099, "learning_rate": 9.22336292335404e-06, "loss": 2.5926326751708983, "step": 66790 }, { "epoch": 0.5392184561239234, "grad_norm": 0.9013175368309021, "learning_rate": 9.221747453615825e-06, "loss": 2.7947818756103517, "step": 66800 }, { "epoch": 0.5392991774496905, "grad_norm": 0.8966972231864929, "learning_rate": 9.220131983877613e-06, "loss": 2.6187665939331053, "step": 66810 }, { "epoch": 0.5393798987754574, "grad_norm": 0.6814980506896973, "learning_rate": 9.218516514139399e-06, "loss": 2.609779167175293, "step": 66820 }, { "epoch": 0.5394606201012245, "grad_norm": 1.0948481559753418, "learning_rate": 9.216901044401187e-06, "loss": 2.346756935119629, "step": 66830 }, { "epoch": 0.5395413414269916, "grad_norm": 1.0472509860992432, "learning_rate": 9.215285574662973e-06, "loss": 2.6707557678222655, "step": 66840 }, { "epoch": 0.5396220627527587, "grad_norm": 0.7482317090034485, "learning_rate": 9.21367010492476e-06, "loss": 2.5030866622924806, "step": 66850 }, { "epoch": 0.5397027840785257, "grad_norm": 1.2092877626419067, "learning_rate": 9.212054635186546e-06, "loss": 2.23837833404541, "step": 66860 }, { "epoch": 0.5397835054042928, "grad_norm": 1.157626748085022, "learning_rate": 9.210439165448334e-06, "loss": 2.916807746887207, "step": 66870 }, { "epoch": 0.5398642267300598, "grad_norm": 1.5258640050888062, "learning_rate": 9.20882369571012e-06, "loss": 3.152965545654297, "step": 66880 }, { "epoch": 0.5399449480558268, "grad_norm": 0.7012800574302673, "learning_rate": 9.207208225971908e-06, "loss": 2.576932907104492, "step": 66890 }, { "epoch": 0.5400256693815939, "grad_norm": 0.8892056941986084, "learning_rate": 9.205592756233694e-06, "loss": 2.6157758712768553, "step": 66900 }, { "epoch": 0.540106390707361, "grad_norm": 0.900619626045227, "learning_rate": 9.203977286495482e-06, "loss": 2.446853256225586, "step": 66910 }, { "epoch": 0.5401871120331281, "grad_norm": 0.9584051966667175, "learning_rate": 9.202361816757268e-06, "loss": 2.741512107849121, "step": 66920 }, { "epoch": 0.540267833358895, "grad_norm": 0.9260470271110535, "learning_rate": 9.200746347019055e-06, "loss": 2.899355888366699, "step": 66930 }, { "epoch": 0.5403485546846621, "grad_norm": 1.064615249633789, "learning_rate": 9.199130877280841e-06, "loss": 2.4106311798095703, "step": 66940 }, { "epoch": 0.5404292760104292, "grad_norm": 2.2057809829711914, "learning_rate": 9.197515407542629e-06, "loss": 2.4431150436401365, "step": 66950 }, { "epoch": 0.5405099973361962, "grad_norm": 1.3176122903823853, "learning_rate": 9.195899937804415e-06, "loss": 2.439723587036133, "step": 66960 }, { "epoch": 0.5405907186619633, "grad_norm": 1.4546586275100708, "learning_rate": 9.194284468066203e-06, "loss": 2.9383190155029295, "step": 66970 }, { "epoch": 0.5406714399877304, "grad_norm": 0.9956502914428711, "learning_rate": 9.192668998327989e-06, "loss": 2.9232330322265625, "step": 66980 }, { "epoch": 0.5407521613134975, "grad_norm": 0.991291344165802, "learning_rate": 9.191053528589776e-06, "loss": 2.91060733795166, "step": 66990 }, { "epoch": 0.5408328826392644, "grad_norm": 0.5353359580039978, "learning_rate": 9.189438058851562e-06, "loss": 3.0645151138305664, "step": 67000 }, { "epoch": 0.5409136039650315, "grad_norm": 0.7572982311248779, "learning_rate": 9.18782258911335e-06, "loss": 3.1160783767700195, "step": 67010 }, { "epoch": 0.5409943252907986, "grad_norm": 1.4768750667572021, "learning_rate": 9.186207119375136e-06, "loss": 2.649012565612793, "step": 67020 }, { "epoch": 0.5410750466165656, "grad_norm": 1.2156972885131836, "learning_rate": 9.184591649636924e-06, "loss": 2.854851722717285, "step": 67030 }, { "epoch": 0.5411557679423327, "grad_norm": 1.4800187349319458, "learning_rate": 9.18297617989871e-06, "loss": 2.7452125549316406, "step": 67040 }, { "epoch": 0.5412364892680998, "grad_norm": 1.220479130744934, "learning_rate": 9.181360710160497e-06, "loss": 2.618784713745117, "step": 67050 }, { "epoch": 0.5413172105938668, "grad_norm": 1.543470859527588, "learning_rate": 9.179745240422283e-06, "loss": 3.02921199798584, "step": 67060 }, { "epoch": 0.5413979319196338, "grad_norm": 1.5534205436706543, "learning_rate": 9.178129770684071e-06, "loss": 2.781233024597168, "step": 67070 }, { "epoch": 0.5414786532454009, "grad_norm": 1.319061279296875, "learning_rate": 9.176514300945859e-06, "loss": 3.2270553588867186, "step": 67080 }, { "epoch": 0.541559374571168, "grad_norm": 0.8494532108306885, "learning_rate": 9.174898831207645e-06, "loss": 2.738298034667969, "step": 67090 }, { "epoch": 0.541640095896935, "grad_norm": 0.7491436004638672, "learning_rate": 9.173283361469432e-06, "loss": 2.812118148803711, "step": 67100 }, { "epoch": 0.541720817222702, "grad_norm": 0.8729871511459351, "learning_rate": 9.171667891731218e-06, "loss": 2.5544219970703126, "step": 67110 }, { "epoch": 0.5418015385484691, "grad_norm": 0.7615089416503906, "learning_rate": 9.170052421993006e-06, "loss": 2.573879051208496, "step": 67120 }, { "epoch": 0.5418822598742362, "grad_norm": 0.668912410736084, "learning_rate": 9.168436952254792e-06, "loss": 2.5987903594970705, "step": 67130 }, { "epoch": 0.5419629812000032, "grad_norm": 0.9186383485794067, "learning_rate": 9.16682148251658e-06, "loss": 3.0261474609375, "step": 67140 }, { "epoch": 0.5420437025257703, "grad_norm": 0.9233211874961853, "learning_rate": 9.165206012778366e-06, "loss": 2.6606225967407227, "step": 67150 }, { "epoch": 0.5421244238515374, "grad_norm": 0.7851197123527527, "learning_rate": 9.163590543040154e-06, "loss": 2.510959434509277, "step": 67160 }, { "epoch": 0.5422051451773043, "grad_norm": 0.9527661204338074, "learning_rate": 9.16197507330194e-06, "loss": 2.5788854598999023, "step": 67170 }, { "epoch": 0.5422858665030714, "grad_norm": 0.8638319969177246, "learning_rate": 9.160359603563727e-06, "loss": 3.0018383026123048, "step": 67180 }, { "epoch": 0.5423665878288385, "grad_norm": 1.041693091392517, "learning_rate": 9.158744133825513e-06, "loss": 3.1862565994262697, "step": 67190 }, { "epoch": 0.5424473091546056, "grad_norm": 0.7658764123916626, "learning_rate": 9.157128664087301e-06, "loss": 2.882832908630371, "step": 67200 }, { "epoch": 0.5425280304803726, "grad_norm": 1.4164212942123413, "learning_rate": 9.155513194349087e-06, "loss": 2.8893835067749025, "step": 67210 }, { "epoch": 0.5426087518061397, "grad_norm": 0.6762641072273254, "learning_rate": 9.153897724610875e-06, "loss": 2.9626012802124024, "step": 67220 }, { "epoch": 0.5426894731319067, "grad_norm": 1.269137978553772, "learning_rate": 9.15228225487266e-06, "loss": 3.157781219482422, "step": 67230 }, { "epoch": 0.5427701944576738, "grad_norm": 0.8099255561828613, "learning_rate": 9.150666785134448e-06, "loss": 2.424734115600586, "step": 67240 }, { "epoch": 0.5428509157834408, "grad_norm": 0.7594401240348816, "learning_rate": 9.149051315396234e-06, "loss": 3.3570518493652344, "step": 67250 }, { "epoch": 0.5429316371092079, "grad_norm": 1.6822465658187866, "learning_rate": 9.147435845658022e-06, "loss": 2.7886701583862306, "step": 67260 }, { "epoch": 0.543012358434975, "grad_norm": 0.7695360779762268, "learning_rate": 9.145820375919808e-06, "loss": 2.65220947265625, "step": 67270 }, { "epoch": 0.543093079760742, "grad_norm": 1.1134369373321533, "learning_rate": 9.144204906181596e-06, "loss": 3.1850202560424803, "step": 67280 }, { "epoch": 0.543173801086509, "grad_norm": 0.906829297542572, "learning_rate": 9.142589436443382e-06, "loss": 2.571891021728516, "step": 67290 }, { "epoch": 0.5432545224122761, "grad_norm": 0.8324800133705139, "learning_rate": 9.14097396670517e-06, "loss": 2.68919677734375, "step": 67300 }, { "epoch": 0.5433352437380432, "grad_norm": 1.353918194770813, "learning_rate": 9.139358496966955e-06, "loss": 3.065423774719238, "step": 67310 }, { "epoch": 0.5434159650638102, "grad_norm": 0.765235424041748, "learning_rate": 9.137743027228743e-06, "loss": 2.951021194458008, "step": 67320 }, { "epoch": 0.5434966863895773, "grad_norm": 1.0268409252166748, "learning_rate": 9.13612755749053e-06, "loss": 2.598089027404785, "step": 67330 }, { "epoch": 0.5435774077153444, "grad_norm": 1.566101312637329, "learning_rate": 9.134512087752317e-06, "loss": 2.696379852294922, "step": 67340 }, { "epoch": 0.5436581290411113, "grad_norm": 1.2902494668960571, "learning_rate": 9.132896618014104e-06, "loss": 2.6964338302612303, "step": 67350 }, { "epoch": 0.5437388503668784, "grad_norm": 0.682284951210022, "learning_rate": 9.13128114827589e-06, "loss": 3.047985649108887, "step": 67360 }, { "epoch": 0.5438195716926455, "grad_norm": 0.7986302971839905, "learning_rate": 9.129665678537678e-06, "loss": 2.8797582626342773, "step": 67370 }, { "epoch": 0.5439002930184126, "grad_norm": 0.9474505186080933, "learning_rate": 9.128050208799464e-06, "loss": 2.7860895156860352, "step": 67380 }, { "epoch": 0.5439810143441796, "grad_norm": 0.7546238303184509, "learning_rate": 9.126434739061252e-06, "loss": 2.457058906555176, "step": 67390 }, { "epoch": 0.5440617356699466, "grad_norm": 0.7764739990234375, "learning_rate": 9.124819269323038e-06, "loss": 2.7697628021240233, "step": 67400 }, { "epoch": 0.5441424569957137, "grad_norm": 0.892667293548584, "learning_rate": 9.123203799584826e-06, "loss": 2.483019256591797, "step": 67410 }, { "epoch": 0.5442231783214807, "grad_norm": 1.0416953563690186, "learning_rate": 9.121588329846613e-06, "loss": 2.4616744995117186, "step": 67420 }, { "epoch": 0.5443038996472478, "grad_norm": 0.7651287317276001, "learning_rate": 9.1199728601084e-06, "loss": 2.760784721374512, "step": 67430 }, { "epoch": 0.5443846209730149, "grad_norm": 1.0050643682479858, "learning_rate": 9.118357390370187e-06, "loss": 2.627708625793457, "step": 67440 }, { "epoch": 0.544465342298782, "grad_norm": 0.5880855917930603, "learning_rate": 9.116741920631973e-06, "loss": 3.137208366394043, "step": 67450 }, { "epoch": 0.5445460636245489, "grad_norm": 0.9852937459945679, "learning_rate": 9.11512645089376e-06, "loss": 2.4869945526123045, "step": 67460 }, { "epoch": 0.544626784950316, "grad_norm": 0.5159394145011902, "learning_rate": 9.113510981155547e-06, "loss": 2.6055831909179688, "step": 67470 }, { "epoch": 0.5447075062760831, "grad_norm": 1.2091344594955444, "learning_rate": 9.111895511417334e-06, "loss": 2.6277517318725585, "step": 67480 }, { "epoch": 0.5447882276018501, "grad_norm": 2.745814561843872, "learning_rate": 9.11028004167912e-06, "loss": 3.289897918701172, "step": 67490 }, { "epoch": 0.5448689489276172, "grad_norm": 1.2543185949325562, "learning_rate": 9.108664571940908e-06, "loss": 2.9034046173095702, "step": 67500 }, { "epoch": 0.5449496702533843, "grad_norm": 1.0742366313934326, "learning_rate": 9.107049102202694e-06, "loss": 2.47442512512207, "step": 67510 }, { "epoch": 0.5450303915791513, "grad_norm": 0.9365745186805725, "learning_rate": 9.105433632464482e-06, "loss": 3.0578237533569337, "step": 67520 }, { "epoch": 0.5451111129049183, "grad_norm": 1.3278915882110596, "learning_rate": 9.103818162726268e-06, "loss": 2.8469463348388673, "step": 67530 }, { "epoch": 0.5451918342306854, "grad_norm": 1.258772850036621, "learning_rate": 9.102202692988055e-06, "loss": 2.408036994934082, "step": 67540 }, { "epoch": 0.5452725555564525, "grad_norm": 0.7442977428436279, "learning_rate": 9.100587223249841e-06, "loss": 2.6423912048339844, "step": 67550 }, { "epoch": 0.5453532768822195, "grad_norm": 0.7758113145828247, "learning_rate": 9.098971753511629e-06, "loss": 3.1857763290405274, "step": 67560 }, { "epoch": 0.5454339982079865, "grad_norm": 0.48259642720222473, "learning_rate": 9.097356283773415e-06, "loss": 2.355965423583984, "step": 67570 }, { "epoch": 0.5455147195337536, "grad_norm": 0.8741592764854431, "learning_rate": 9.095740814035203e-06, "loss": 2.4755325317382812, "step": 67580 }, { "epoch": 0.5455954408595207, "grad_norm": 1.3896938562393188, "learning_rate": 9.094125344296989e-06, "loss": 2.9519960403442385, "step": 67590 }, { "epoch": 0.5456761621852877, "grad_norm": 0.8675661087036133, "learning_rate": 9.092509874558776e-06, "loss": 2.6998361587524413, "step": 67600 }, { "epoch": 0.5457568835110548, "grad_norm": 1.0261765718460083, "learning_rate": 9.090894404820562e-06, "loss": 3.1090316772460938, "step": 67610 }, { "epoch": 0.5458376048368219, "grad_norm": 0.9868694543838501, "learning_rate": 9.08927893508235e-06, "loss": 2.5105846405029295, "step": 67620 }, { "epoch": 0.5459183261625888, "grad_norm": 0.6793646812438965, "learning_rate": 9.087663465344136e-06, "loss": 2.814780426025391, "step": 67630 }, { "epoch": 0.5459990474883559, "grad_norm": 1.0530710220336914, "learning_rate": 9.086047995605924e-06, "loss": 2.774419975280762, "step": 67640 }, { "epoch": 0.546079768814123, "grad_norm": 1.499511480331421, "learning_rate": 9.08443252586771e-06, "loss": 2.974717140197754, "step": 67650 }, { "epoch": 0.5461604901398901, "grad_norm": 0.9599462151527405, "learning_rate": 9.082817056129497e-06, "loss": 2.726487731933594, "step": 67660 }, { "epoch": 0.5462412114656571, "grad_norm": 0.9428451061248779, "learning_rate": 9.081201586391283e-06, "loss": 2.368486022949219, "step": 67670 }, { "epoch": 0.5463219327914242, "grad_norm": 0.9246233701705933, "learning_rate": 9.079586116653071e-06, "loss": 3.190955924987793, "step": 67680 }, { "epoch": 0.5464026541171912, "grad_norm": 0.9863436818122864, "learning_rate": 9.077970646914857e-06, "loss": 2.7750492095947266, "step": 67690 }, { "epoch": 0.5464833754429583, "grad_norm": 0.734026312828064, "learning_rate": 9.076355177176645e-06, "loss": 2.955708122253418, "step": 67700 }, { "epoch": 0.5465640967687253, "grad_norm": 1.0735907554626465, "learning_rate": 9.074739707438431e-06, "loss": 2.331903839111328, "step": 67710 }, { "epoch": 0.5466448180944924, "grad_norm": 0.889685869216919, "learning_rate": 9.073124237700219e-06, "loss": 2.8720935821533202, "step": 67720 }, { "epoch": 0.5467255394202595, "grad_norm": 1.3122708797454834, "learning_rate": 9.071508767962005e-06, "loss": 2.932171630859375, "step": 67730 }, { "epoch": 0.5468062607460265, "grad_norm": 1.074718952178955, "learning_rate": 9.069893298223792e-06, "loss": 3.1865985870361326, "step": 67740 }, { "epoch": 0.5468869820717935, "grad_norm": 1.410150408744812, "learning_rate": 9.068277828485578e-06, "loss": 2.666496467590332, "step": 67750 }, { "epoch": 0.5469677033975606, "grad_norm": 1.1457619667053223, "learning_rate": 9.066662358747366e-06, "loss": 3.0825796127319336, "step": 67760 }, { "epoch": 0.5470484247233277, "grad_norm": 0.7764567732810974, "learning_rate": 9.065046889009152e-06, "loss": 2.5439725875854493, "step": 67770 }, { "epoch": 0.5471291460490947, "grad_norm": 1.7360202074050903, "learning_rate": 9.06343141927094e-06, "loss": 3.406917190551758, "step": 67780 }, { "epoch": 0.5472098673748618, "grad_norm": 0.6355568766593933, "learning_rate": 9.061815949532726e-06, "loss": 2.939764976501465, "step": 67790 }, { "epoch": 0.5472905887006289, "grad_norm": 1.0133637189865112, "learning_rate": 9.060200479794513e-06, "loss": 2.675044631958008, "step": 67800 }, { "epoch": 0.5473713100263958, "grad_norm": 0.6749737858772278, "learning_rate": 9.0585850100563e-06, "loss": 2.781239128112793, "step": 67810 }, { "epoch": 0.5474520313521629, "grad_norm": 0.8358235955238342, "learning_rate": 9.056969540318087e-06, "loss": 3.2742496490478517, "step": 67820 }, { "epoch": 0.54753275267793, "grad_norm": 0.7136818170547485, "learning_rate": 9.055354070579873e-06, "loss": 2.3814523696899412, "step": 67830 }, { "epoch": 0.5476134740036971, "grad_norm": 1.3192352056503296, "learning_rate": 9.05373860084166e-06, "loss": 2.858720397949219, "step": 67840 }, { "epoch": 0.5476941953294641, "grad_norm": 1.0486379861831665, "learning_rate": 9.052123131103447e-06, "loss": 2.864742469787598, "step": 67850 }, { "epoch": 0.5477749166552311, "grad_norm": 0.7726181745529175, "learning_rate": 9.050507661365234e-06, "loss": 2.7194246292114257, "step": 67860 }, { "epoch": 0.5478556379809982, "grad_norm": 1.6295058727264404, "learning_rate": 9.04889219162702e-06, "loss": 2.7466157913208007, "step": 67870 }, { "epoch": 0.5479363593067652, "grad_norm": 0.47440388798713684, "learning_rate": 9.047276721888808e-06, "loss": 2.505438804626465, "step": 67880 }, { "epoch": 0.5480170806325323, "grad_norm": 0.9276584386825562, "learning_rate": 9.045661252150594e-06, "loss": 2.6785478591918945, "step": 67890 }, { "epoch": 0.5480978019582994, "grad_norm": 0.9071744680404663, "learning_rate": 9.044045782412382e-06, "loss": 2.607977867126465, "step": 67900 }, { "epoch": 0.5481785232840665, "grad_norm": 0.9950529336929321, "learning_rate": 9.042430312674168e-06, "loss": 2.4602407455444335, "step": 67910 }, { "epoch": 0.5482592446098334, "grad_norm": 0.7595342397689819, "learning_rate": 9.040814842935955e-06, "loss": 2.9479230880737304, "step": 67920 }, { "epoch": 0.5483399659356005, "grad_norm": 0.7856020331382751, "learning_rate": 9.039199373197741e-06, "loss": 3.0720882415771484, "step": 67930 }, { "epoch": 0.5484206872613676, "grad_norm": 0.9958124160766602, "learning_rate": 9.03758390345953e-06, "loss": 2.393564796447754, "step": 67940 }, { "epoch": 0.5485014085871346, "grad_norm": 0.9598442912101746, "learning_rate": 9.035968433721317e-06, "loss": 2.4617593765258787, "step": 67950 }, { "epoch": 0.5485821299129017, "grad_norm": 0.6407003998756409, "learning_rate": 9.034352963983103e-06, "loss": 2.6666263580322265, "step": 67960 }, { "epoch": 0.5486628512386688, "grad_norm": 1.1806526184082031, "learning_rate": 9.03273749424489e-06, "loss": 2.6367124557495116, "step": 67970 }, { "epoch": 0.5487435725644358, "grad_norm": 0.9801158308982849, "learning_rate": 9.031122024506677e-06, "loss": 2.799906539916992, "step": 67980 }, { "epoch": 0.5488242938902028, "grad_norm": 0.7023945450782776, "learning_rate": 9.029506554768464e-06, "loss": 2.9188283920288085, "step": 67990 }, { "epoch": 0.5489050152159699, "grad_norm": 0.7574989199638367, "learning_rate": 9.02789108503025e-06, "loss": 2.8401105880737303, "step": 68000 }, { "epoch": 0.548985736541737, "grad_norm": 0.536821186542511, "learning_rate": 9.026275615292038e-06, "loss": 2.535263252258301, "step": 68010 }, { "epoch": 0.549066457867504, "grad_norm": 0.7067452669143677, "learning_rate": 9.024660145553824e-06, "loss": 3.1629152297973633, "step": 68020 }, { "epoch": 0.549147179193271, "grad_norm": 0.8238305449485779, "learning_rate": 9.023044675815612e-06, "loss": 2.6814523696899415, "step": 68030 }, { "epoch": 0.5492279005190381, "grad_norm": 0.8722226619720459, "learning_rate": 9.021429206077398e-06, "loss": 2.67110538482666, "step": 68040 }, { "epoch": 0.5493086218448052, "grad_norm": 0.7791191339492798, "learning_rate": 9.019813736339185e-06, "loss": 2.682409477233887, "step": 68050 }, { "epoch": 0.5493893431705722, "grad_norm": 1.1875056028366089, "learning_rate": 9.018198266600971e-06, "loss": 2.4850431442260743, "step": 68060 }, { "epoch": 0.5494700644963393, "grad_norm": 0.8148719072341919, "learning_rate": 9.016582796862759e-06, "loss": 2.9878469467163087, "step": 68070 }, { "epoch": 0.5495507858221064, "grad_norm": 1.6797350645065308, "learning_rate": 9.014967327124545e-06, "loss": 3.8786319732666015, "step": 68080 }, { "epoch": 0.5496315071478733, "grad_norm": 0.5970274806022644, "learning_rate": 9.013351857386333e-06, "loss": 2.8182016372680665, "step": 68090 }, { "epoch": 0.5497122284736404, "grad_norm": 0.7358610033988953, "learning_rate": 9.011736387648119e-06, "loss": 2.7010772705078123, "step": 68100 }, { "epoch": 0.5497929497994075, "grad_norm": 0.821807324886322, "learning_rate": 9.010120917909906e-06, "loss": 2.778568458557129, "step": 68110 }, { "epoch": 0.5498736711251746, "grad_norm": 0.7116708755493164, "learning_rate": 9.008505448171692e-06, "loss": 2.805987739562988, "step": 68120 }, { "epoch": 0.5499543924509416, "grad_norm": 1.1544603109359741, "learning_rate": 9.00688997843348e-06, "loss": 2.540417289733887, "step": 68130 }, { "epoch": 0.5500351137767087, "grad_norm": 0.9610567688941956, "learning_rate": 9.005274508695266e-06, "loss": 2.5695066452026367, "step": 68140 }, { "epoch": 0.5501158351024757, "grad_norm": 0.826495885848999, "learning_rate": 9.003659038957054e-06, "loss": 3.0384237289428713, "step": 68150 }, { "epoch": 0.5501965564282427, "grad_norm": 0.8280348181724548, "learning_rate": 9.00204356921884e-06, "loss": 2.6875328063964843, "step": 68160 }, { "epoch": 0.5502772777540098, "grad_norm": 0.9600370526313782, "learning_rate": 9.000428099480627e-06, "loss": 3.071222686767578, "step": 68170 }, { "epoch": 0.5503579990797769, "grad_norm": 1.3946385383605957, "learning_rate": 8.998812629742413e-06, "loss": 4.05182991027832, "step": 68180 }, { "epoch": 0.550438720405544, "grad_norm": 1.2246003150939941, "learning_rate": 8.997197160004201e-06, "loss": 2.7558305740356444, "step": 68190 }, { "epoch": 0.550519441731311, "grad_norm": 0.9337326884269714, "learning_rate": 8.995581690265987e-06, "loss": 3.0291128158569336, "step": 68200 }, { "epoch": 0.550600163057078, "grad_norm": 1.0631054639816284, "learning_rate": 8.993966220527775e-06, "loss": 2.7678361892700196, "step": 68210 }, { "epoch": 0.5506808843828451, "grad_norm": 0.6890244483947754, "learning_rate": 8.992350750789561e-06, "loss": 2.4855745315551756, "step": 68220 }, { "epoch": 0.5507616057086122, "grad_norm": 0.7485345602035522, "learning_rate": 8.990735281051349e-06, "loss": 2.9377004623413088, "step": 68230 }, { "epoch": 0.5508423270343792, "grad_norm": 0.6072964668273926, "learning_rate": 8.989119811313135e-06, "loss": 3.1243265151977537, "step": 68240 }, { "epoch": 0.5509230483601463, "grad_norm": 0.6783725619316101, "learning_rate": 8.987504341574922e-06, "loss": 2.649277114868164, "step": 68250 }, { "epoch": 0.5510037696859134, "grad_norm": 0.8094569444656372, "learning_rate": 8.985888871836708e-06, "loss": 2.5687128067016602, "step": 68260 }, { "epoch": 0.5510844910116803, "grad_norm": 0.7741521596908569, "learning_rate": 8.984273402098496e-06, "loss": 2.900868606567383, "step": 68270 }, { "epoch": 0.5511652123374474, "grad_norm": 0.8451604843139648, "learning_rate": 8.982657932360282e-06, "loss": 3.1114322662353517, "step": 68280 }, { "epoch": 0.5512459336632145, "grad_norm": 1.3533333539962769, "learning_rate": 8.98104246262207e-06, "loss": 2.6767528533935545, "step": 68290 }, { "epoch": 0.5513266549889816, "grad_norm": 1.2492327690124512, "learning_rate": 8.979426992883856e-06, "loss": 2.514153480529785, "step": 68300 }, { "epoch": 0.5514073763147486, "grad_norm": 0.9143707156181335, "learning_rate": 8.977811523145643e-06, "loss": 2.665667724609375, "step": 68310 }, { "epoch": 0.5514880976405157, "grad_norm": 0.8537277579307556, "learning_rate": 8.97619605340743e-06, "loss": 2.5956344604492188, "step": 68320 }, { "epoch": 0.5515688189662827, "grad_norm": 0.9322079420089722, "learning_rate": 8.974580583669217e-06, "loss": 2.528798294067383, "step": 68330 }, { "epoch": 0.5516495402920497, "grad_norm": 0.6696037650108337, "learning_rate": 8.972965113931003e-06, "loss": 2.7986621856689453, "step": 68340 }, { "epoch": 0.5517302616178168, "grad_norm": 0.8666760325431824, "learning_rate": 8.97134964419279e-06, "loss": 2.7370542526245116, "step": 68350 }, { "epoch": 0.5518109829435839, "grad_norm": 0.8777284622192383, "learning_rate": 8.969734174454577e-06, "loss": 2.856829071044922, "step": 68360 }, { "epoch": 0.551891704269351, "grad_norm": 0.8364585041999817, "learning_rate": 8.968118704716364e-06, "loss": 2.8813507080078127, "step": 68370 }, { "epoch": 0.5519724255951179, "grad_norm": 2.377246141433716, "learning_rate": 8.96650323497815e-06, "loss": 2.786186408996582, "step": 68380 }, { "epoch": 0.552053146920885, "grad_norm": 0.8762518763542175, "learning_rate": 8.964887765239938e-06, "loss": 2.800217628479004, "step": 68390 }, { "epoch": 0.5521338682466521, "grad_norm": 2.124077320098877, "learning_rate": 8.963272295501724e-06, "loss": 2.989437294006348, "step": 68400 }, { "epoch": 0.5522145895724191, "grad_norm": 1.8051878213882446, "learning_rate": 8.961656825763512e-06, "loss": 2.9754833221435546, "step": 68410 }, { "epoch": 0.5522953108981862, "grad_norm": 1.088865041732788, "learning_rate": 8.960041356025298e-06, "loss": 2.6154596328735353, "step": 68420 }, { "epoch": 0.5523760322239533, "grad_norm": 1.436716079711914, "learning_rate": 8.958425886287085e-06, "loss": 2.5008235931396485, "step": 68430 }, { "epoch": 0.5524567535497203, "grad_norm": 0.7822259068489075, "learning_rate": 8.956810416548871e-06, "loss": 2.367584228515625, "step": 68440 }, { "epoch": 0.5525374748754873, "grad_norm": 0.8069871068000793, "learning_rate": 8.955194946810659e-06, "loss": 2.732004737854004, "step": 68450 }, { "epoch": 0.5526181962012544, "grad_norm": 0.6239734888076782, "learning_rate": 8.953579477072447e-06, "loss": 2.517543601989746, "step": 68460 }, { "epoch": 0.5526989175270215, "grad_norm": 1.1705145835876465, "learning_rate": 8.951964007334233e-06, "loss": 2.719125747680664, "step": 68470 }, { "epoch": 0.5527796388527885, "grad_norm": 1.0034699440002441, "learning_rate": 8.95034853759602e-06, "loss": 2.5009782791137694, "step": 68480 }, { "epoch": 0.5528603601785556, "grad_norm": 0.7870101928710938, "learning_rate": 8.948733067857807e-06, "loss": 2.6832937240600585, "step": 68490 }, { "epoch": 0.5529410815043226, "grad_norm": 0.624028742313385, "learning_rate": 8.947117598119594e-06, "loss": 2.6922670364379884, "step": 68500 }, { "epoch": 0.5530218028300897, "grad_norm": 0.7682995796203613, "learning_rate": 8.94550212838138e-06, "loss": 2.643082046508789, "step": 68510 }, { "epoch": 0.5531025241558567, "grad_norm": 1.243777871131897, "learning_rate": 8.943886658643168e-06, "loss": 2.636848258972168, "step": 68520 }, { "epoch": 0.5531832454816238, "grad_norm": 0.9211108684539795, "learning_rate": 8.942271188904954e-06, "loss": 2.3405216217041014, "step": 68530 }, { "epoch": 0.5532639668073909, "grad_norm": 0.7446728348731995, "learning_rate": 8.940655719166742e-06, "loss": 2.634966278076172, "step": 68540 }, { "epoch": 0.5533446881331578, "grad_norm": 0.8734778165817261, "learning_rate": 8.939040249428528e-06, "loss": 2.908854866027832, "step": 68550 }, { "epoch": 0.5534254094589249, "grad_norm": 1.0577392578125, "learning_rate": 8.937424779690315e-06, "loss": 2.734014892578125, "step": 68560 }, { "epoch": 0.553506130784692, "grad_norm": 1.033473014831543, "learning_rate": 8.935809309952103e-06, "loss": 2.399601173400879, "step": 68570 }, { "epoch": 0.5535868521104591, "grad_norm": 0.9764602780342102, "learning_rate": 8.934193840213889e-06, "loss": 2.5759550094604493, "step": 68580 }, { "epoch": 0.5536675734362261, "grad_norm": 1.4112082719802856, "learning_rate": 8.932578370475677e-06, "loss": 2.9129043579101563, "step": 68590 }, { "epoch": 0.5537482947619932, "grad_norm": 0.8515361547470093, "learning_rate": 8.930962900737463e-06, "loss": 2.542300796508789, "step": 68600 }, { "epoch": 0.5538290160877603, "grad_norm": 1.2026772499084473, "learning_rate": 8.92934743099925e-06, "loss": 2.462432289123535, "step": 68610 }, { "epoch": 0.5539097374135272, "grad_norm": 0.6601575016975403, "learning_rate": 8.927731961261036e-06, "loss": 3.142300605773926, "step": 68620 }, { "epoch": 0.5539904587392943, "grad_norm": 0.8442374467849731, "learning_rate": 8.926116491522824e-06, "loss": 2.50419921875, "step": 68630 }, { "epoch": 0.5540711800650614, "grad_norm": 0.879165530204773, "learning_rate": 8.92450102178461e-06, "loss": 2.9480941772460936, "step": 68640 }, { "epoch": 0.5541519013908285, "grad_norm": 1.612297773361206, "learning_rate": 8.922885552046398e-06, "loss": 2.7363521575927736, "step": 68650 }, { "epoch": 0.5542326227165955, "grad_norm": 0.8091497421264648, "learning_rate": 8.921270082308184e-06, "loss": 2.5793256759643555, "step": 68660 }, { "epoch": 0.5543133440423625, "grad_norm": 0.8653298020362854, "learning_rate": 8.919654612569971e-06, "loss": 2.8224359512329102, "step": 68670 }, { "epoch": 0.5543940653681296, "grad_norm": 3.31577205657959, "learning_rate": 8.918039142831757e-06, "loss": 3.2822040557861327, "step": 68680 }, { "epoch": 0.5544747866938967, "grad_norm": 1.4300745725631714, "learning_rate": 8.916423673093545e-06, "loss": 2.640229034423828, "step": 68690 }, { "epoch": 0.5545555080196637, "grad_norm": 1.247196912765503, "learning_rate": 8.914808203355331e-06, "loss": 3.0047754287719726, "step": 68700 }, { "epoch": 0.5546362293454308, "grad_norm": 0.7231185436248779, "learning_rate": 8.913192733617119e-06, "loss": 2.7247323989868164, "step": 68710 }, { "epoch": 0.5547169506711979, "grad_norm": 0.977504312992096, "learning_rate": 8.911577263878905e-06, "loss": 2.404830551147461, "step": 68720 }, { "epoch": 0.5547976719969648, "grad_norm": 1.3627289533615112, "learning_rate": 8.909961794140693e-06, "loss": 2.4938526153564453, "step": 68730 }, { "epoch": 0.5548783933227319, "grad_norm": 0.7815677523612976, "learning_rate": 8.908346324402479e-06, "loss": 2.9239938735961912, "step": 68740 }, { "epoch": 0.554959114648499, "grad_norm": 1.1061527729034424, "learning_rate": 8.906730854664266e-06, "loss": 2.651065444946289, "step": 68750 }, { "epoch": 0.5550398359742661, "grad_norm": 1.1571069955825806, "learning_rate": 8.905115384926052e-06, "loss": 2.3808385848999025, "step": 68760 }, { "epoch": 0.5551205573000331, "grad_norm": 1.3975197076797485, "learning_rate": 8.90349991518784e-06, "loss": 2.593484115600586, "step": 68770 }, { "epoch": 0.5552012786258002, "grad_norm": 1.4076261520385742, "learning_rate": 8.901884445449626e-06, "loss": 2.6668066024780273, "step": 68780 }, { "epoch": 0.5552819999515672, "grad_norm": 1.3367241621017456, "learning_rate": 8.900268975711414e-06, "loss": 2.26806755065918, "step": 68790 }, { "epoch": 0.5553627212773342, "grad_norm": 1.2499973773956299, "learning_rate": 8.8986535059732e-06, "loss": 3.4059017181396483, "step": 68800 }, { "epoch": 0.5554434426031013, "grad_norm": 1.5284972190856934, "learning_rate": 8.897038036234987e-06, "loss": 2.832452583312988, "step": 68810 }, { "epoch": 0.5555241639288684, "grad_norm": 0.9157561659812927, "learning_rate": 8.895422566496775e-06, "loss": 2.5010194778442383, "step": 68820 }, { "epoch": 0.5556048852546355, "grad_norm": 1.3173414468765259, "learning_rate": 8.893807096758561e-06, "loss": 2.7098875045776367, "step": 68830 }, { "epoch": 0.5556856065804024, "grad_norm": 1.0285224914550781, "learning_rate": 8.892191627020349e-06, "loss": 2.872201156616211, "step": 68840 }, { "epoch": 0.5557663279061695, "grad_norm": 0.846528947353363, "learning_rate": 8.890576157282135e-06, "loss": 2.665414047241211, "step": 68850 }, { "epoch": 0.5558470492319366, "grad_norm": 1.0530729293823242, "learning_rate": 8.888960687543922e-06, "loss": 2.7584823608398437, "step": 68860 }, { "epoch": 0.5559277705577036, "grad_norm": 0.8362921476364136, "learning_rate": 8.887345217805708e-06, "loss": 2.786329650878906, "step": 68870 }, { "epoch": 0.5560084918834707, "grad_norm": 1.319419503211975, "learning_rate": 8.885729748067496e-06, "loss": 2.94699764251709, "step": 68880 }, { "epoch": 0.5560892132092378, "grad_norm": 1.4797831773757935, "learning_rate": 8.884114278329282e-06, "loss": 2.9400096893310548, "step": 68890 }, { "epoch": 0.5561699345350049, "grad_norm": 0.98296058177948, "learning_rate": 8.88249880859107e-06, "loss": 2.699439811706543, "step": 68900 }, { "epoch": 0.5562506558607718, "grad_norm": 1.028140902519226, "learning_rate": 8.880883338852856e-06, "loss": 2.452855110168457, "step": 68910 }, { "epoch": 0.5563313771865389, "grad_norm": 1.0682117938995361, "learning_rate": 8.879267869114643e-06, "loss": 2.3759910583496096, "step": 68920 }, { "epoch": 0.556412098512306, "grad_norm": 0.8862724304199219, "learning_rate": 8.87765239937643e-06, "loss": 2.3881900787353514, "step": 68930 }, { "epoch": 0.556492819838073, "grad_norm": 0.7430771589279175, "learning_rate": 8.876036929638217e-06, "loss": 2.657524299621582, "step": 68940 }, { "epoch": 0.5565735411638401, "grad_norm": 0.8731399774551392, "learning_rate": 8.874421459900003e-06, "loss": 2.594661903381348, "step": 68950 }, { "epoch": 0.5566542624896071, "grad_norm": 2.64945650100708, "learning_rate": 8.87280599016179e-06, "loss": 3.511203384399414, "step": 68960 }, { "epoch": 0.5567349838153742, "grad_norm": 0.5222543478012085, "learning_rate": 8.871190520423577e-06, "loss": 2.6869861602783205, "step": 68970 }, { "epoch": 0.5568157051411412, "grad_norm": 0.7350373864173889, "learning_rate": 8.869575050685365e-06, "loss": 2.830305480957031, "step": 68980 }, { "epoch": 0.5568964264669083, "grad_norm": 0.8966012597084045, "learning_rate": 8.86795958094715e-06, "loss": 2.4471853256225584, "step": 68990 }, { "epoch": 0.5569771477926754, "grad_norm": 0.9666773080825806, "learning_rate": 8.866344111208938e-06, "loss": 2.4339471817016602, "step": 69000 }, { "epoch": 0.5570578691184424, "grad_norm": 0.752396821975708, "learning_rate": 8.864728641470724e-06, "loss": 2.9059165954589843, "step": 69010 }, { "epoch": 0.5571385904442094, "grad_norm": 1.4665818214416504, "learning_rate": 8.863113171732512e-06, "loss": 2.5931882858276367, "step": 69020 }, { "epoch": 0.5572193117699765, "grad_norm": 0.7242111563682556, "learning_rate": 8.861497701994298e-06, "loss": 3.2445945739746094, "step": 69030 }, { "epoch": 0.5573000330957436, "grad_norm": 0.7686972618103027, "learning_rate": 8.859882232256086e-06, "loss": 2.4262004852294923, "step": 69040 }, { "epoch": 0.5573807544215106, "grad_norm": 0.8558059930801392, "learning_rate": 8.858266762517872e-06, "loss": 2.7307357788085938, "step": 69050 }, { "epoch": 0.5574614757472777, "grad_norm": 0.7598464488983154, "learning_rate": 8.85665129277966e-06, "loss": 3.0498085021972656, "step": 69060 }, { "epoch": 0.5575421970730448, "grad_norm": 0.9277040362358093, "learning_rate": 8.855035823041445e-06, "loss": 2.4084651947021483, "step": 69070 }, { "epoch": 0.5576229183988117, "grad_norm": 0.9130436778068542, "learning_rate": 8.853420353303233e-06, "loss": 2.6635692596435545, "step": 69080 }, { "epoch": 0.5577036397245788, "grad_norm": 1.1427247524261475, "learning_rate": 8.851804883565019e-06, "loss": 2.5186548233032227, "step": 69090 }, { "epoch": 0.5577843610503459, "grad_norm": 1.1994469165802002, "learning_rate": 8.850189413826807e-06, "loss": 2.8307117462158202, "step": 69100 }, { "epoch": 0.557865082376113, "grad_norm": 1.4393212795257568, "learning_rate": 8.848573944088593e-06, "loss": 3.784463119506836, "step": 69110 }, { "epoch": 0.55794580370188, "grad_norm": 1.2590705156326294, "learning_rate": 8.84695847435038e-06, "loss": 2.967340850830078, "step": 69120 }, { "epoch": 0.558026525027647, "grad_norm": 0.8790159821510315, "learning_rate": 8.845343004612166e-06, "loss": 2.7351306915283202, "step": 69130 }, { "epoch": 0.5581072463534141, "grad_norm": 1.3820416927337646, "learning_rate": 8.843727534873954e-06, "loss": 3.306249237060547, "step": 69140 }, { "epoch": 0.5581879676791812, "grad_norm": 1.328086256980896, "learning_rate": 8.84211206513574e-06, "loss": 2.6384693145751954, "step": 69150 }, { "epoch": 0.5582686890049482, "grad_norm": 0.8257951736450195, "learning_rate": 8.840496595397528e-06, "loss": 2.9809032440185548, "step": 69160 }, { "epoch": 0.5583494103307153, "grad_norm": 1.1334888935089111, "learning_rate": 8.838881125659314e-06, "loss": 3.1093658447265624, "step": 69170 }, { "epoch": 0.5584301316564824, "grad_norm": 0.8343296647071838, "learning_rate": 8.837265655921101e-06, "loss": 2.6630741119384767, "step": 69180 }, { "epoch": 0.5585108529822493, "grad_norm": 0.7567329406738281, "learning_rate": 8.835650186182887e-06, "loss": 2.6861431121826174, "step": 69190 }, { "epoch": 0.5585915743080164, "grad_norm": 1.0967352390289307, "learning_rate": 8.834034716444675e-06, "loss": 2.7799278259277345, "step": 69200 }, { "epoch": 0.5586722956337835, "grad_norm": 1.4256618022918701, "learning_rate": 8.832419246706461e-06, "loss": 2.381102752685547, "step": 69210 }, { "epoch": 0.5587530169595506, "grad_norm": 0.6182551383972168, "learning_rate": 8.830803776968249e-06, "loss": 2.8960641860961913, "step": 69220 }, { "epoch": 0.5588337382853176, "grad_norm": 1.337871789932251, "learning_rate": 8.829188307230035e-06, "loss": 2.4801483154296875, "step": 69230 }, { "epoch": 0.5589144596110847, "grad_norm": 0.6564591526985168, "learning_rate": 8.827572837491823e-06, "loss": 2.5917325973510743, "step": 69240 }, { "epoch": 0.5589951809368517, "grad_norm": 0.7189070582389832, "learning_rate": 8.825957367753609e-06, "loss": 3.158804512023926, "step": 69250 }, { "epoch": 0.5590759022626187, "grad_norm": 1.94998300075531, "learning_rate": 8.824341898015396e-06, "loss": 2.7115829467773436, "step": 69260 }, { "epoch": 0.5591566235883858, "grad_norm": 1.522640347480774, "learning_rate": 8.822726428277182e-06, "loss": 2.9645952224731444, "step": 69270 }, { "epoch": 0.5592373449141529, "grad_norm": 1.113831877708435, "learning_rate": 8.82111095853897e-06, "loss": 2.827356147766113, "step": 69280 }, { "epoch": 0.55931806623992, "grad_norm": 0.6865177154541016, "learning_rate": 8.819495488800756e-06, "loss": 2.722677230834961, "step": 69290 }, { "epoch": 0.559398787565687, "grad_norm": 0.8616926074028015, "learning_rate": 8.817880019062544e-06, "loss": 2.81732234954834, "step": 69300 }, { "epoch": 0.559479508891454, "grad_norm": 0.8698227405548096, "learning_rate": 8.81626454932433e-06, "loss": 2.8291593551635743, "step": 69310 }, { "epoch": 0.5595602302172211, "grad_norm": 0.9279578924179077, "learning_rate": 8.814649079586117e-06, "loss": 2.647258186340332, "step": 69320 }, { "epoch": 0.5596409515429881, "grad_norm": 0.8092362284660339, "learning_rate": 8.813033609847905e-06, "loss": 2.396136474609375, "step": 69330 }, { "epoch": 0.5597216728687552, "grad_norm": 0.6696781516075134, "learning_rate": 8.811418140109691e-06, "loss": 2.5418128967285156, "step": 69340 }, { "epoch": 0.5598023941945223, "grad_norm": 0.8379492163658142, "learning_rate": 8.809802670371479e-06, "loss": 2.787540626525879, "step": 69350 }, { "epoch": 0.5598831155202894, "grad_norm": 1.390145182609558, "learning_rate": 8.808187200633265e-06, "loss": 2.7793294906616213, "step": 69360 }, { "epoch": 0.5599638368460563, "grad_norm": 0.7498407959938049, "learning_rate": 8.806571730895052e-06, "loss": 2.5186092376708986, "step": 69370 }, { "epoch": 0.5600445581718234, "grad_norm": 1.2543209791183472, "learning_rate": 8.804956261156838e-06, "loss": 2.518184471130371, "step": 69380 }, { "epoch": 0.5601252794975905, "grad_norm": 1.0185314416885376, "learning_rate": 8.803340791418626e-06, "loss": 3.126900863647461, "step": 69390 }, { "epoch": 0.5602060008233575, "grad_norm": 0.8751082420349121, "learning_rate": 8.801725321680412e-06, "loss": 2.7360084533691404, "step": 69400 }, { "epoch": 0.5602867221491246, "grad_norm": 0.8032041192054749, "learning_rate": 8.8001098519422e-06, "loss": 2.716495132446289, "step": 69410 }, { "epoch": 0.5603674434748916, "grad_norm": 1.2302606105804443, "learning_rate": 8.798494382203986e-06, "loss": 2.4202245712280273, "step": 69420 }, { "epoch": 0.5604481648006587, "grad_norm": 1.2479183673858643, "learning_rate": 8.796878912465773e-06, "loss": 2.5013790130615234, "step": 69430 }, { "epoch": 0.5605288861264257, "grad_norm": 0.8240470290184021, "learning_rate": 8.79526344272756e-06, "loss": 2.912079429626465, "step": 69440 }, { "epoch": 0.5606096074521928, "grad_norm": 1.4350762367248535, "learning_rate": 8.793647972989347e-06, "loss": 2.6226797103881836, "step": 69450 }, { "epoch": 0.5606903287779599, "grad_norm": 0.9881635904312134, "learning_rate": 8.792032503251133e-06, "loss": 2.297444725036621, "step": 69460 }, { "epoch": 0.5607710501037269, "grad_norm": 1.8173729181289673, "learning_rate": 8.79041703351292e-06, "loss": 2.459422492980957, "step": 69470 }, { "epoch": 0.5608517714294939, "grad_norm": 1.1653664112091064, "learning_rate": 8.788801563774707e-06, "loss": 2.615503692626953, "step": 69480 }, { "epoch": 0.560932492755261, "grad_norm": 0.6570237278938293, "learning_rate": 8.787186094036494e-06, "loss": 3.0261688232421875, "step": 69490 }, { "epoch": 0.5610132140810281, "grad_norm": 0.6548423767089844, "learning_rate": 8.78557062429828e-06, "loss": 3.2930816650390624, "step": 69500 }, { "epoch": 0.5610939354067951, "grad_norm": 1.774915099143982, "learning_rate": 8.783955154560068e-06, "loss": 2.646867561340332, "step": 69510 }, { "epoch": 0.5611746567325622, "grad_norm": 1.075457215309143, "learning_rate": 8.782339684821854e-06, "loss": 3.150398826599121, "step": 69520 }, { "epoch": 0.5612553780583293, "grad_norm": 0.9154182076454163, "learning_rate": 8.780724215083642e-06, "loss": 3.023557662963867, "step": 69530 }, { "epoch": 0.5613360993840962, "grad_norm": 0.723503828048706, "learning_rate": 8.779108745345428e-06, "loss": 2.780537223815918, "step": 69540 }, { "epoch": 0.5614168207098633, "grad_norm": 0.8653856515884399, "learning_rate": 8.777493275607216e-06, "loss": 2.5680185317993165, "step": 69550 }, { "epoch": 0.5614975420356304, "grad_norm": 0.9728602170944214, "learning_rate": 8.775877805869002e-06, "loss": 2.527903366088867, "step": 69560 }, { "epoch": 0.5615782633613975, "grad_norm": 1.2797623872756958, "learning_rate": 8.77426233613079e-06, "loss": 2.721633529663086, "step": 69570 }, { "epoch": 0.5616589846871645, "grad_norm": 0.9059458374977112, "learning_rate": 8.772646866392575e-06, "loss": 3.0788864135742187, "step": 69580 }, { "epoch": 0.5617397060129316, "grad_norm": 0.767794668674469, "learning_rate": 8.771031396654363e-06, "loss": 2.7039613723754883, "step": 69590 }, { "epoch": 0.5618204273386986, "grad_norm": 0.7947110533714294, "learning_rate": 8.769415926916149e-06, "loss": 3.108237648010254, "step": 69600 }, { "epoch": 0.5619011486644656, "grad_norm": 1.1729841232299805, "learning_rate": 8.767800457177937e-06, "loss": 2.843811798095703, "step": 69610 }, { "epoch": 0.5619818699902327, "grad_norm": 0.9611645936965942, "learning_rate": 8.766184987439723e-06, "loss": 2.8026973724365236, "step": 69620 }, { "epoch": 0.5620625913159998, "grad_norm": 0.7054030299186707, "learning_rate": 8.76456951770151e-06, "loss": 2.576084518432617, "step": 69630 }, { "epoch": 0.5621433126417669, "grad_norm": 0.7402308583259583, "learning_rate": 8.762954047963296e-06, "loss": 2.392243576049805, "step": 69640 }, { "epoch": 0.5622240339675338, "grad_norm": 0.7312794923782349, "learning_rate": 8.761338578225084e-06, "loss": 3.106257438659668, "step": 69650 }, { "epoch": 0.5623047552933009, "grad_norm": 0.7943443655967712, "learning_rate": 8.75972310848687e-06, "loss": 2.7249717712402344, "step": 69660 }, { "epoch": 0.562385476619068, "grad_norm": 0.8751308917999268, "learning_rate": 8.758107638748658e-06, "loss": 3.475240707397461, "step": 69670 }, { "epoch": 0.5624661979448351, "grad_norm": 0.7918875217437744, "learning_rate": 8.756492169010444e-06, "loss": 2.5615732192993166, "step": 69680 }, { "epoch": 0.5625469192706021, "grad_norm": 0.720585286617279, "learning_rate": 8.754876699272231e-06, "loss": 2.455985450744629, "step": 69690 }, { "epoch": 0.5626276405963692, "grad_norm": 1.6293138265609741, "learning_rate": 8.753261229534017e-06, "loss": 2.9280534744262696, "step": 69700 }, { "epoch": 0.5627083619221362, "grad_norm": 0.875110387802124, "learning_rate": 8.751645759795805e-06, "loss": 2.6366851806640623, "step": 69710 }, { "epoch": 0.5627890832479032, "grad_norm": 1.6256685256958008, "learning_rate": 8.750030290057591e-06, "loss": 2.619727897644043, "step": 69720 }, { "epoch": 0.5628698045736703, "grad_norm": 0.7265613079071045, "learning_rate": 8.748414820319379e-06, "loss": 2.5498498916625976, "step": 69730 }, { "epoch": 0.5629505258994374, "grad_norm": 0.7501372694969177, "learning_rate": 8.746799350581165e-06, "loss": 2.7723066329956056, "step": 69740 }, { "epoch": 0.5630312472252045, "grad_norm": 0.9253152012825012, "learning_rate": 8.745183880842952e-06, "loss": 2.9963268280029296, "step": 69750 }, { "epoch": 0.5631119685509715, "grad_norm": 0.6027212738990784, "learning_rate": 8.743568411104738e-06, "loss": 2.807143211364746, "step": 69760 }, { "epoch": 0.5631926898767385, "grad_norm": 1.1062064170837402, "learning_rate": 8.741952941366526e-06, "loss": 2.5753103256225587, "step": 69770 }, { "epoch": 0.5632734112025056, "grad_norm": 1.4196763038635254, "learning_rate": 8.740337471628312e-06, "loss": 2.411296081542969, "step": 69780 }, { "epoch": 0.5633541325282726, "grad_norm": 0.7100703120231628, "learning_rate": 8.7387220018901e-06, "loss": 2.740334892272949, "step": 69790 }, { "epoch": 0.5634348538540397, "grad_norm": 1.1727288961410522, "learning_rate": 8.737106532151886e-06, "loss": 3.3425331115722656, "step": 69800 }, { "epoch": 0.5635155751798068, "grad_norm": 0.6434983015060425, "learning_rate": 8.735491062413674e-06, "loss": 2.4354623794555663, "step": 69810 }, { "epoch": 0.5635962965055739, "grad_norm": 1.139117956161499, "learning_rate": 8.73387559267546e-06, "loss": 2.3893802642822264, "step": 69820 }, { "epoch": 0.5636770178313408, "grad_norm": 0.8392848372459412, "learning_rate": 8.732260122937249e-06, "loss": 2.7902040481567383, "step": 69830 }, { "epoch": 0.5637577391571079, "grad_norm": 1.555674433708191, "learning_rate": 8.730644653199035e-06, "loss": 2.402145576477051, "step": 69840 }, { "epoch": 0.563838460482875, "grad_norm": 1.068200945854187, "learning_rate": 8.729029183460823e-06, "loss": 3.1735694885253904, "step": 69850 }, { "epoch": 0.563919181808642, "grad_norm": 1.3719826936721802, "learning_rate": 8.727413713722609e-06, "loss": 2.803030586242676, "step": 69860 }, { "epoch": 0.5639999031344091, "grad_norm": 0.8544766902923584, "learning_rate": 8.725798243984396e-06, "loss": 2.3146486282348633, "step": 69870 }, { "epoch": 0.5640806244601761, "grad_norm": 0.8712292909622192, "learning_rate": 8.724182774246182e-06, "loss": 2.7589244842529297, "step": 69880 }, { "epoch": 0.5641613457859432, "grad_norm": 0.5880495309829712, "learning_rate": 8.72256730450797e-06, "loss": 2.4977724075317385, "step": 69890 }, { "epoch": 0.5642420671117102, "grad_norm": 1.862489104270935, "learning_rate": 8.720951834769756e-06, "loss": 3.457975387573242, "step": 69900 }, { "epoch": 0.5643227884374773, "grad_norm": 1.6701372861862183, "learning_rate": 8.719336365031544e-06, "loss": 3.2128746032714846, "step": 69910 }, { "epoch": 0.5644035097632444, "grad_norm": 1.4664983749389648, "learning_rate": 8.71772089529333e-06, "loss": 3.358843994140625, "step": 69920 }, { "epoch": 0.5644842310890114, "grad_norm": 0.8099707365036011, "learning_rate": 8.716105425555117e-06, "loss": 2.5354265213012694, "step": 69930 }, { "epoch": 0.5645649524147784, "grad_norm": 0.6489706039428711, "learning_rate": 8.714489955816903e-06, "loss": 2.724095344543457, "step": 69940 }, { "epoch": 0.5646456737405455, "grad_norm": 1.6554808616638184, "learning_rate": 8.712874486078691e-06, "loss": 2.6434656143188477, "step": 69950 }, { "epoch": 0.5647263950663126, "grad_norm": 1.2253469228744507, "learning_rate": 8.711259016340477e-06, "loss": 2.4430126190185546, "step": 69960 }, { "epoch": 0.5648071163920796, "grad_norm": 0.9159449338912964, "learning_rate": 8.709643546602265e-06, "loss": 2.3662435531616213, "step": 69970 }, { "epoch": 0.5648878377178467, "grad_norm": 1.1776914596557617, "learning_rate": 8.70802807686405e-06, "loss": 2.8082298278808593, "step": 69980 }, { "epoch": 0.5649685590436138, "grad_norm": 0.7671461701393127, "learning_rate": 8.706412607125838e-06, "loss": 2.2544300079345705, "step": 69990 }, { "epoch": 0.5650492803693807, "grad_norm": 1.5165408849716187, "learning_rate": 8.704797137387624e-06, "loss": 2.7937255859375, "step": 70000 }, { "epoch": 0.5651300016951478, "grad_norm": 0.6749463677406311, "learning_rate": 8.703181667649412e-06, "loss": 2.5899274826049803, "step": 70010 }, { "epoch": 0.5652107230209149, "grad_norm": 0.8687059283256531, "learning_rate": 8.701566197911198e-06, "loss": 2.647169494628906, "step": 70020 }, { "epoch": 0.565291444346682, "grad_norm": 1.1112639904022217, "learning_rate": 8.699950728172986e-06, "loss": 2.8736455917358397, "step": 70030 }, { "epoch": 0.565372165672449, "grad_norm": 0.7157689929008484, "learning_rate": 8.698335258434772e-06, "loss": 2.673432540893555, "step": 70040 }, { "epoch": 0.565452886998216, "grad_norm": 0.9677110314369202, "learning_rate": 8.69671978869656e-06, "loss": 2.8065475463867187, "step": 70050 }, { "epoch": 0.5655336083239831, "grad_norm": 1.0598814487457275, "learning_rate": 8.695104318958346e-06, "loss": 2.7722959518432617, "step": 70060 }, { "epoch": 0.5656143296497501, "grad_norm": 0.9939298629760742, "learning_rate": 8.693488849220133e-06, "loss": 2.378040885925293, "step": 70070 }, { "epoch": 0.5656950509755172, "grad_norm": 1.2395291328430176, "learning_rate": 8.69187337948192e-06, "loss": 2.883759689331055, "step": 70080 }, { "epoch": 0.5657757723012843, "grad_norm": 0.9498328566551208, "learning_rate": 8.690257909743707e-06, "loss": 2.594162178039551, "step": 70090 }, { "epoch": 0.5658564936270514, "grad_norm": 0.6842402219772339, "learning_rate": 8.688642440005493e-06, "loss": 2.6118616104125976, "step": 70100 }, { "epoch": 0.5659372149528183, "grad_norm": 1.2285538911819458, "learning_rate": 8.68702697026728e-06, "loss": 2.644623947143555, "step": 70110 }, { "epoch": 0.5660179362785854, "grad_norm": 0.7567952871322632, "learning_rate": 8.685411500529067e-06, "loss": 2.5251880645751954, "step": 70120 }, { "epoch": 0.5660986576043525, "grad_norm": 0.6338270306587219, "learning_rate": 8.683796030790854e-06, "loss": 2.519650459289551, "step": 70130 }, { "epoch": 0.5661793789301196, "grad_norm": 0.8360531330108643, "learning_rate": 8.68218056105264e-06, "loss": 2.4879194259643556, "step": 70140 }, { "epoch": 0.5662601002558866, "grad_norm": 2.9381632804870605, "learning_rate": 8.680565091314428e-06, "loss": 3.102207565307617, "step": 70150 }, { "epoch": 0.5663408215816537, "grad_norm": 1.5127365589141846, "learning_rate": 8.678949621576214e-06, "loss": 2.4756443023681642, "step": 70160 }, { "epoch": 0.5664215429074207, "grad_norm": 1.0445623397827148, "learning_rate": 8.677334151838002e-06, "loss": 2.560200309753418, "step": 70170 }, { "epoch": 0.5665022642331877, "grad_norm": 0.5820647478103638, "learning_rate": 8.675718682099788e-06, "loss": 2.7984439849853517, "step": 70180 }, { "epoch": 0.5665829855589548, "grad_norm": 0.824671745300293, "learning_rate": 8.674103212361575e-06, "loss": 2.485113334655762, "step": 70190 }, { "epoch": 0.5666637068847219, "grad_norm": 1.4206852912902832, "learning_rate": 8.672487742623363e-06, "loss": 2.4186878204345703, "step": 70200 }, { "epoch": 0.566744428210489, "grad_norm": 1.8747172355651855, "learning_rate": 8.670872272885149e-06, "loss": 2.565887451171875, "step": 70210 }, { "epoch": 0.566825149536256, "grad_norm": 0.6992209553718567, "learning_rate": 8.669256803146937e-06, "loss": 2.518600845336914, "step": 70220 }, { "epoch": 0.566905870862023, "grad_norm": 0.9828473329544067, "learning_rate": 8.667641333408723e-06, "loss": 2.8206401824951173, "step": 70230 }, { "epoch": 0.5669865921877901, "grad_norm": 1.0431180000305176, "learning_rate": 8.66602586367051e-06, "loss": 2.3714569091796873, "step": 70240 }, { "epoch": 0.5670673135135571, "grad_norm": 1.1622960567474365, "learning_rate": 8.664410393932296e-06, "loss": 2.759322738647461, "step": 70250 }, { "epoch": 0.5671480348393242, "grad_norm": 0.9474912881851196, "learning_rate": 8.662794924194084e-06, "loss": 2.3414424896240233, "step": 70260 }, { "epoch": 0.5672287561650913, "grad_norm": 1.128210186958313, "learning_rate": 8.66117945445587e-06, "loss": 2.6330326080322264, "step": 70270 }, { "epoch": 0.5673094774908584, "grad_norm": 0.780776858329773, "learning_rate": 8.659563984717658e-06, "loss": 2.914566421508789, "step": 70280 }, { "epoch": 0.5673901988166253, "grad_norm": 0.9013010263442993, "learning_rate": 8.657948514979444e-06, "loss": 2.4985193252563476, "step": 70290 }, { "epoch": 0.5674709201423924, "grad_norm": 0.5848103761672974, "learning_rate": 8.656333045241232e-06, "loss": 2.5285150527954103, "step": 70300 }, { "epoch": 0.5675516414681595, "grad_norm": 1.0910693407058716, "learning_rate": 8.654717575503018e-06, "loss": 2.8859226226806642, "step": 70310 }, { "epoch": 0.5676323627939265, "grad_norm": 1.2888967990875244, "learning_rate": 8.653102105764805e-06, "loss": 3.1108381271362306, "step": 70320 }, { "epoch": 0.5677130841196936, "grad_norm": 0.8447852730751038, "learning_rate": 8.651486636026591e-06, "loss": 2.6885683059692385, "step": 70330 }, { "epoch": 0.5677938054454607, "grad_norm": 1.3177785873413086, "learning_rate": 8.649871166288379e-06, "loss": 3.061619758605957, "step": 70340 }, { "epoch": 0.5678745267712277, "grad_norm": 0.7608953714370728, "learning_rate": 8.648255696550165e-06, "loss": 2.707319641113281, "step": 70350 }, { "epoch": 0.5679552480969947, "grad_norm": 0.8966322541236877, "learning_rate": 8.646640226811953e-06, "loss": 2.600930404663086, "step": 70360 }, { "epoch": 0.5680359694227618, "grad_norm": 0.7241640686988831, "learning_rate": 8.645024757073739e-06, "loss": 2.5804912567138674, "step": 70370 }, { "epoch": 0.5681166907485289, "grad_norm": 0.8045690655708313, "learning_rate": 8.643409287335526e-06, "loss": 2.8704160690307616, "step": 70380 }, { "epoch": 0.5681974120742959, "grad_norm": 2.0690572261810303, "learning_rate": 8.641793817597312e-06, "loss": 1.943891716003418, "step": 70390 }, { "epoch": 0.568278133400063, "grad_norm": 1.2326154708862305, "learning_rate": 8.6401783478591e-06, "loss": 2.6505346298217773, "step": 70400 }, { "epoch": 0.56835885472583, "grad_norm": 0.73372882604599, "learning_rate": 8.638562878120886e-06, "loss": 2.634976387023926, "step": 70410 }, { "epoch": 0.5684395760515971, "grad_norm": 0.6421201825141907, "learning_rate": 8.636947408382674e-06, "loss": 2.469908905029297, "step": 70420 }, { "epoch": 0.5685202973773641, "grad_norm": 1.3046015501022339, "learning_rate": 8.63533193864446e-06, "loss": 2.6337907791137694, "step": 70430 }, { "epoch": 0.5686010187031312, "grad_norm": 0.6879579424858093, "learning_rate": 8.633716468906247e-06, "loss": 3.1156379699707033, "step": 70440 }, { "epoch": 0.5686817400288983, "grad_norm": 1.0662201642990112, "learning_rate": 8.632100999168033e-06, "loss": 2.613593864440918, "step": 70450 }, { "epoch": 0.5687624613546652, "grad_norm": 0.8870927095413208, "learning_rate": 8.630485529429821e-06, "loss": 2.5411190032958983, "step": 70460 }, { "epoch": 0.5688431826804323, "grad_norm": 0.8752979040145874, "learning_rate": 8.628870059691607e-06, "loss": 2.945201301574707, "step": 70470 }, { "epoch": 0.5689239040061994, "grad_norm": 0.7250199913978577, "learning_rate": 8.627254589953395e-06, "loss": 2.571285438537598, "step": 70480 }, { "epoch": 0.5690046253319665, "grad_norm": 0.5452528595924377, "learning_rate": 8.62563912021518e-06, "loss": 2.590117835998535, "step": 70490 }, { "epoch": 0.5690853466577335, "grad_norm": 0.8752773404121399, "learning_rate": 8.624023650476968e-06, "loss": 3.1533391952514647, "step": 70500 }, { "epoch": 0.5691660679835006, "grad_norm": 0.554500937461853, "learning_rate": 8.622408180738754e-06, "loss": 2.7337457656860353, "step": 70510 }, { "epoch": 0.5692467893092676, "grad_norm": 1.0355242490768433, "learning_rate": 8.620792711000542e-06, "loss": 2.7882055282592773, "step": 70520 }, { "epoch": 0.5693275106350346, "grad_norm": 0.703609049320221, "learning_rate": 8.619177241262328e-06, "loss": 2.8817047119140624, "step": 70530 }, { "epoch": 0.5694082319608017, "grad_norm": 1.5271880626678467, "learning_rate": 8.617561771524116e-06, "loss": 3.440016174316406, "step": 70540 }, { "epoch": 0.5694889532865688, "grad_norm": 0.687994658946991, "learning_rate": 8.615946301785902e-06, "loss": 2.680006408691406, "step": 70550 }, { "epoch": 0.5695696746123359, "grad_norm": 1.2755403518676758, "learning_rate": 8.61433083204769e-06, "loss": 2.905594062805176, "step": 70560 }, { "epoch": 0.5696503959381028, "grad_norm": 1.0467627048492432, "learning_rate": 8.612715362309476e-06, "loss": 2.549333953857422, "step": 70570 }, { "epoch": 0.5697311172638699, "grad_norm": 0.9668877124786377, "learning_rate": 8.611099892571263e-06, "loss": 2.527004432678223, "step": 70580 }, { "epoch": 0.569811838589637, "grad_norm": 0.7881115078926086, "learning_rate": 8.60948442283305e-06, "loss": 2.378710174560547, "step": 70590 }, { "epoch": 0.569892559915404, "grad_norm": 0.7026617527008057, "learning_rate": 8.607868953094837e-06, "loss": 2.815200996398926, "step": 70600 }, { "epoch": 0.5699732812411711, "grad_norm": 1.213921308517456, "learning_rate": 8.606253483356623e-06, "loss": 2.603243827819824, "step": 70610 }, { "epoch": 0.5700540025669382, "grad_norm": 0.8678907155990601, "learning_rate": 8.60463801361841e-06, "loss": 2.5051103591918946, "step": 70620 }, { "epoch": 0.5701347238927053, "grad_norm": 1.0685914754867554, "learning_rate": 8.603022543880197e-06, "loss": 2.5898590087890625, "step": 70630 }, { "epoch": 0.5702154452184722, "grad_norm": 1.2696936130523682, "learning_rate": 8.601407074141984e-06, "loss": 2.8733795166015623, "step": 70640 }, { "epoch": 0.5702961665442393, "grad_norm": 1.2244137525558472, "learning_rate": 8.59979160440377e-06, "loss": 2.8757680892944335, "step": 70650 }, { "epoch": 0.5703768878700064, "grad_norm": 0.6630085110664368, "learning_rate": 8.598176134665558e-06, "loss": 2.8866397857666017, "step": 70660 }, { "epoch": 0.5704576091957735, "grad_norm": 0.8215787410736084, "learning_rate": 8.596560664927344e-06, "loss": 2.8116683959960938, "step": 70670 }, { "epoch": 0.5705383305215405, "grad_norm": 0.6944597363471985, "learning_rate": 8.594945195189132e-06, "loss": 2.3554080963134765, "step": 70680 }, { "epoch": 0.5706190518473075, "grad_norm": 1.4737486839294434, "learning_rate": 8.593329725450918e-06, "loss": 2.6410591125488283, "step": 70690 }, { "epoch": 0.5706997731730746, "grad_norm": 1.1688750982284546, "learning_rate": 8.591714255712705e-06, "loss": 2.8943464279174806, "step": 70700 }, { "epoch": 0.5707804944988416, "grad_norm": 2.1629867553710938, "learning_rate": 8.590098785974491e-06, "loss": 3.5888744354248048, "step": 70710 }, { "epoch": 0.5708612158246087, "grad_norm": 0.8536109328269958, "learning_rate": 8.588483316236279e-06, "loss": 3.3467124938964843, "step": 70720 }, { "epoch": 0.5709419371503758, "grad_norm": 1.2024767398834229, "learning_rate": 8.586867846498067e-06, "loss": 2.513399124145508, "step": 70730 }, { "epoch": 0.5710226584761429, "grad_norm": 0.7614394426345825, "learning_rate": 8.585252376759853e-06, "loss": 3.070068359375, "step": 70740 }, { "epoch": 0.5711033798019098, "grad_norm": 1.0826184749603271, "learning_rate": 8.58363690702164e-06, "loss": 2.6588485717773436, "step": 70750 }, { "epoch": 0.5711841011276769, "grad_norm": 1.1892551183700562, "learning_rate": 8.582021437283426e-06, "loss": 2.589252471923828, "step": 70760 }, { "epoch": 0.571264822453444, "grad_norm": 0.6928769946098328, "learning_rate": 8.580405967545214e-06, "loss": 3.335498809814453, "step": 70770 }, { "epoch": 0.571345543779211, "grad_norm": 1.2847156524658203, "learning_rate": 8.578790497807e-06, "loss": 2.622482681274414, "step": 70780 }, { "epoch": 0.5714262651049781, "grad_norm": 0.561610221862793, "learning_rate": 8.577175028068788e-06, "loss": 3.0145429611206054, "step": 70790 }, { "epoch": 0.5715069864307452, "grad_norm": 0.8028357028961182, "learning_rate": 8.575559558330574e-06, "loss": 2.8338687896728514, "step": 70800 }, { "epoch": 0.5715877077565122, "grad_norm": 2.272930145263672, "learning_rate": 8.573944088592362e-06, "loss": 2.998872184753418, "step": 70810 }, { "epoch": 0.5716684290822792, "grad_norm": 1.7325814962387085, "learning_rate": 8.572328618854148e-06, "loss": 2.6196022033691406, "step": 70820 }, { "epoch": 0.5717491504080463, "grad_norm": 1.5151783227920532, "learning_rate": 8.570713149115935e-06, "loss": 2.52750186920166, "step": 70830 }, { "epoch": 0.5718298717338134, "grad_norm": 0.6839366555213928, "learning_rate": 8.569097679377721e-06, "loss": 2.7138418197631835, "step": 70840 }, { "epoch": 0.5719105930595804, "grad_norm": 0.9162713885307312, "learning_rate": 8.567482209639509e-06, "loss": 2.377191734313965, "step": 70850 }, { "epoch": 0.5719913143853474, "grad_norm": 1.1420340538024902, "learning_rate": 8.565866739901295e-06, "loss": 2.5714492797851562, "step": 70860 }, { "epoch": 0.5720720357111145, "grad_norm": 1.1925817728042603, "learning_rate": 8.564251270163083e-06, "loss": 2.735685920715332, "step": 70870 }, { "epoch": 0.5721527570368816, "grad_norm": 0.9435027837753296, "learning_rate": 8.562635800424869e-06, "loss": 2.6719644546508787, "step": 70880 }, { "epoch": 0.5722334783626486, "grad_norm": 0.8991585373878479, "learning_rate": 8.561020330686656e-06, "loss": 2.949668121337891, "step": 70890 }, { "epoch": 0.5723141996884157, "grad_norm": 1.476405382156372, "learning_rate": 8.559404860948442e-06, "loss": 2.7996912002563477, "step": 70900 }, { "epoch": 0.5723949210141828, "grad_norm": 0.6108872294425964, "learning_rate": 8.55778939121023e-06, "loss": 2.5581069946289063, "step": 70910 }, { "epoch": 0.5724756423399497, "grad_norm": 0.6714274287223816, "learning_rate": 8.556173921472016e-06, "loss": 2.7933984756469727, "step": 70920 }, { "epoch": 0.5725563636657168, "grad_norm": 1.3816941976547241, "learning_rate": 8.554558451733804e-06, "loss": 3.173944282531738, "step": 70930 }, { "epoch": 0.5726370849914839, "grad_norm": 1.0409560203552246, "learning_rate": 8.55294298199559e-06, "loss": 2.8427677154541016, "step": 70940 }, { "epoch": 0.572717806317251, "grad_norm": 1.0428940057754517, "learning_rate": 8.551327512257377e-06, "loss": 2.9585031509399413, "step": 70950 }, { "epoch": 0.572798527643018, "grad_norm": 0.7285772562026978, "learning_rate": 8.549712042519163e-06, "loss": 2.517208480834961, "step": 70960 }, { "epoch": 0.5728792489687851, "grad_norm": 0.7330382466316223, "learning_rate": 8.548096572780951e-06, "loss": 2.9835412979125975, "step": 70970 }, { "epoch": 0.5729599702945521, "grad_norm": 0.8213006854057312, "learning_rate": 8.546481103042737e-06, "loss": 2.347883415222168, "step": 70980 }, { "epoch": 0.5730406916203191, "grad_norm": 1.0416141748428345, "learning_rate": 8.544865633304525e-06, "loss": 2.5994247436523437, "step": 70990 }, { "epoch": 0.5731214129460862, "grad_norm": 0.9470089077949524, "learning_rate": 8.54325016356631e-06, "loss": 2.709563064575195, "step": 71000 }, { "epoch": 0.5732021342718533, "grad_norm": 0.46362465620040894, "learning_rate": 8.541634693828098e-06, "loss": 2.5483400344848635, "step": 71010 }, { "epoch": 0.5732828555976204, "grad_norm": 1.0360729694366455, "learning_rate": 8.540019224089884e-06, "loss": 2.4443058013916015, "step": 71020 }, { "epoch": 0.5733635769233874, "grad_norm": 0.8667279481887817, "learning_rate": 8.538403754351672e-06, "loss": 2.6952320098876954, "step": 71030 }, { "epoch": 0.5734442982491544, "grad_norm": 0.7536954879760742, "learning_rate": 8.536788284613458e-06, "loss": 2.5612512588500977, "step": 71040 }, { "epoch": 0.5735250195749215, "grad_norm": 1.0241092443466187, "learning_rate": 8.535172814875246e-06, "loss": 2.7880929946899413, "step": 71050 }, { "epoch": 0.5736057409006885, "grad_norm": 0.8401640057563782, "learning_rate": 8.533557345137032e-06, "loss": 2.6110204696655273, "step": 71060 }, { "epoch": 0.5736864622264556, "grad_norm": 0.7319850921630859, "learning_rate": 8.53194187539882e-06, "loss": 2.149139976501465, "step": 71070 }, { "epoch": 0.5737671835522227, "grad_norm": 1.0098506212234497, "learning_rate": 8.530326405660607e-06, "loss": 2.8885412216186523, "step": 71080 }, { "epoch": 0.5738479048779898, "grad_norm": 0.6556792259216309, "learning_rate": 8.528710935922395e-06, "loss": 3.342525863647461, "step": 71090 }, { "epoch": 0.5739286262037567, "grad_norm": 1.0458691120147705, "learning_rate": 8.527095466184181e-06, "loss": 2.5990447998046875, "step": 71100 }, { "epoch": 0.5740093475295238, "grad_norm": 1.4255186319351196, "learning_rate": 8.525479996445969e-06, "loss": 2.7873592376708984, "step": 71110 }, { "epoch": 0.5740900688552909, "grad_norm": 1.000389814376831, "learning_rate": 8.523864526707755e-06, "loss": 2.845492362976074, "step": 71120 }, { "epoch": 0.574170790181058, "grad_norm": 1.0777243375778198, "learning_rate": 8.522249056969542e-06, "loss": 2.9889984130859375, "step": 71130 }, { "epoch": 0.574251511506825, "grad_norm": 0.8074893355369568, "learning_rate": 8.520633587231328e-06, "loss": 2.8178829193115233, "step": 71140 }, { "epoch": 0.574332232832592, "grad_norm": 0.9592083692550659, "learning_rate": 8.519018117493116e-06, "loss": 3.110280990600586, "step": 71150 }, { "epoch": 0.5744129541583591, "grad_norm": 1.2565982341766357, "learning_rate": 8.517402647754902e-06, "loss": 2.9438282012939454, "step": 71160 }, { "epoch": 0.5744936754841261, "grad_norm": 0.9995409846305847, "learning_rate": 8.51578717801669e-06, "loss": 2.612152099609375, "step": 71170 }, { "epoch": 0.5745743968098932, "grad_norm": 1.1707826852798462, "learning_rate": 8.514171708278476e-06, "loss": 2.791557502746582, "step": 71180 }, { "epoch": 0.5746551181356603, "grad_norm": 0.8367659449577332, "learning_rate": 8.512556238540263e-06, "loss": 2.7316762924194338, "step": 71190 }, { "epoch": 0.5747358394614274, "grad_norm": 0.9127318263053894, "learning_rate": 8.51094076880205e-06, "loss": 2.7415103912353516, "step": 71200 }, { "epoch": 0.5748165607871943, "grad_norm": 1.602075457572937, "learning_rate": 8.509325299063837e-06, "loss": 2.3511474609375, "step": 71210 }, { "epoch": 0.5748972821129614, "grad_norm": 1.1406508684158325, "learning_rate": 8.507709829325623e-06, "loss": 2.2491329193115233, "step": 71220 }, { "epoch": 0.5749780034387285, "grad_norm": 0.7460692524909973, "learning_rate": 8.50609435958741e-06, "loss": 2.5551288604736326, "step": 71230 }, { "epoch": 0.5750587247644955, "grad_norm": 1.2764397859573364, "learning_rate": 8.504478889849197e-06, "loss": 2.4169601440429687, "step": 71240 }, { "epoch": 0.5751394460902626, "grad_norm": 0.5439231395721436, "learning_rate": 8.502863420110984e-06, "loss": 2.4895652770996093, "step": 71250 }, { "epoch": 0.5752201674160297, "grad_norm": 1.6311321258544922, "learning_rate": 8.50124795037277e-06, "loss": 3.00234317779541, "step": 71260 }, { "epoch": 0.5753008887417967, "grad_norm": 1.5484287738800049, "learning_rate": 8.499632480634558e-06, "loss": 2.654132270812988, "step": 71270 }, { "epoch": 0.5753816100675637, "grad_norm": 0.9670373201370239, "learning_rate": 8.498017010896344e-06, "loss": 2.8510757446289063, "step": 71280 }, { "epoch": 0.5754623313933308, "grad_norm": 0.8669227361679077, "learning_rate": 8.496401541158132e-06, "loss": 3.1703296661376954, "step": 71290 }, { "epoch": 0.5755430527190979, "grad_norm": 0.64058518409729, "learning_rate": 8.494786071419918e-06, "loss": 2.7812593460083006, "step": 71300 }, { "epoch": 0.5756237740448649, "grad_norm": 0.8221001625061035, "learning_rate": 8.493170601681705e-06, "loss": 3.3774883270263674, "step": 71310 }, { "epoch": 0.575704495370632, "grad_norm": 0.8294082880020142, "learning_rate": 8.491555131943491e-06, "loss": 2.3724851608276367, "step": 71320 }, { "epoch": 0.575785216696399, "grad_norm": 1.5627530813217163, "learning_rate": 8.48993966220528e-06, "loss": 2.941431427001953, "step": 71330 }, { "epoch": 0.5758659380221661, "grad_norm": 1.0493046045303345, "learning_rate": 8.488324192467065e-06, "loss": 3.1177579879760744, "step": 71340 }, { "epoch": 0.5759466593479331, "grad_norm": 0.8790988922119141, "learning_rate": 8.486708722728853e-06, "loss": 2.755732536315918, "step": 71350 }, { "epoch": 0.5760273806737002, "grad_norm": 1.3854146003723145, "learning_rate": 8.485093252990639e-06, "loss": 2.6051441192626954, "step": 71360 }, { "epoch": 0.5761081019994673, "grad_norm": 0.7155079245567322, "learning_rate": 8.483477783252427e-06, "loss": 3.03267822265625, "step": 71370 }, { "epoch": 0.5761888233252342, "grad_norm": 0.6913570761680603, "learning_rate": 8.481862313514213e-06, "loss": 2.478913688659668, "step": 71380 }, { "epoch": 0.5762695446510013, "grad_norm": 1.2314738035202026, "learning_rate": 8.480246843776e-06, "loss": 2.499867630004883, "step": 71390 }, { "epoch": 0.5763502659767684, "grad_norm": 0.8179033994674683, "learning_rate": 8.478631374037786e-06, "loss": 2.8350147247314452, "step": 71400 }, { "epoch": 0.5764309873025355, "grad_norm": 0.7705984115600586, "learning_rate": 8.477015904299574e-06, "loss": 2.7105884552001953, "step": 71410 }, { "epoch": 0.5765117086283025, "grad_norm": 0.9496265053749084, "learning_rate": 8.47540043456136e-06, "loss": 2.3699409484863283, "step": 71420 }, { "epoch": 0.5765924299540696, "grad_norm": 0.6977405548095703, "learning_rate": 8.473784964823148e-06, "loss": 2.676533508300781, "step": 71430 }, { "epoch": 0.5766731512798366, "grad_norm": 0.7251043319702148, "learning_rate": 8.472169495084934e-06, "loss": 2.58435115814209, "step": 71440 }, { "epoch": 0.5767538726056036, "grad_norm": 0.6439180970191956, "learning_rate": 8.470554025346721e-06, "loss": 3.1269481658935545, "step": 71450 }, { "epoch": 0.5768345939313707, "grad_norm": 1.1631556749343872, "learning_rate": 8.468938555608507e-06, "loss": 3.066762924194336, "step": 71460 }, { "epoch": 0.5769153152571378, "grad_norm": 1.4432121515274048, "learning_rate": 8.467323085870295e-06, "loss": 3.4681339263916016, "step": 71470 }, { "epoch": 0.5769960365829049, "grad_norm": 0.9342564940452576, "learning_rate": 8.465707616132081e-06, "loss": 2.819921875, "step": 71480 }, { "epoch": 0.5770767579086719, "grad_norm": 1.034903883934021, "learning_rate": 8.464092146393869e-06, "loss": 2.698648452758789, "step": 71490 }, { "epoch": 0.5771574792344389, "grad_norm": 1.1044615507125854, "learning_rate": 8.462476676655655e-06, "loss": 2.5765499114990233, "step": 71500 }, { "epoch": 0.577238200560206, "grad_norm": 1.0514470338821411, "learning_rate": 8.460861206917442e-06, "loss": 2.9579578399658204, "step": 71510 }, { "epoch": 0.577318921885973, "grad_norm": 0.9436450600624084, "learning_rate": 8.459245737179228e-06, "loss": 2.868064117431641, "step": 71520 }, { "epoch": 0.5773996432117401, "grad_norm": 1.1410595178604126, "learning_rate": 8.457630267441016e-06, "loss": 2.484342575073242, "step": 71530 }, { "epoch": 0.5774803645375072, "grad_norm": 0.6899647116661072, "learning_rate": 8.456014797702802e-06, "loss": 2.4461339950561523, "step": 71540 }, { "epoch": 0.5775610858632743, "grad_norm": 0.7272682785987854, "learning_rate": 8.45439932796459e-06, "loss": 3.3232040405273438, "step": 71550 }, { "epoch": 0.5776418071890412, "grad_norm": 1.3245247602462769, "learning_rate": 8.452783858226376e-06, "loss": 2.9515865325927733, "step": 71560 }, { "epoch": 0.5777225285148083, "grad_norm": 1.676081657409668, "learning_rate": 8.451168388488163e-06, "loss": 2.656619071960449, "step": 71570 }, { "epoch": 0.5778032498405754, "grad_norm": 0.9909622669219971, "learning_rate": 8.44955291874995e-06, "loss": 2.615946388244629, "step": 71580 }, { "epoch": 0.5778839711663424, "grad_norm": 0.9266827702522278, "learning_rate": 8.447937449011737e-06, "loss": 2.7659807205200195, "step": 71590 }, { "epoch": 0.5779646924921095, "grad_norm": 0.8413435816764832, "learning_rate": 8.446321979273525e-06, "loss": 2.6840280532836913, "step": 71600 }, { "epoch": 0.5780454138178766, "grad_norm": 1.2820359468460083, "learning_rate": 8.444706509535311e-06, "loss": 2.494765281677246, "step": 71610 }, { "epoch": 0.5781261351436436, "grad_norm": 0.5610114336013794, "learning_rate": 8.443091039797099e-06, "loss": 2.6799385070800783, "step": 71620 }, { "epoch": 0.5782068564694106, "grad_norm": 0.6531897783279419, "learning_rate": 8.441475570058885e-06, "loss": 3.067895698547363, "step": 71630 }, { "epoch": 0.5782875777951777, "grad_norm": 0.8198985457420349, "learning_rate": 8.439860100320672e-06, "loss": 2.5261508941650392, "step": 71640 }, { "epoch": 0.5783682991209448, "grad_norm": 0.7964907288551331, "learning_rate": 8.438244630582458e-06, "loss": 2.9490119934082033, "step": 71650 }, { "epoch": 0.5784490204467119, "grad_norm": 0.8980060815811157, "learning_rate": 8.436629160844246e-06, "loss": 2.787994956970215, "step": 71660 }, { "epoch": 0.5785297417724788, "grad_norm": 1.3810913562774658, "learning_rate": 8.435013691106032e-06, "loss": 2.720753860473633, "step": 71670 }, { "epoch": 0.5786104630982459, "grad_norm": 1.072797179222107, "learning_rate": 8.43339822136782e-06, "loss": 3.328601837158203, "step": 71680 }, { "epoch": 0.578691184424013, "grad_norm": 0.8802223205566406, "learning_rate": 8.431782751629606e-06, "loss": 2.4765392303466798, "step": 71690 }, { "epoch": 0.57877190574978, "grad_norm": 1.4956010580062866, "learning_rate": 8.430167281891393e-06, "loss": 2.213337707519531, "step": 71700 }, { "epoch": 0.5788526270755471, "grad_norm": 1.056604027748108, "learning_rate": 8.42855181215318e-06, "loss": 2.4170618057250977, "step": 71710 }, { "epoch": 0.5789333484013142, "grad_norm": 1.391567587852478, "learning_rate": 8.426936342414967e-06, "loss": 3.0947866439819336, "step": 71720 }, { "epoch": 0.5790140697270812, "grad_norm": 1.0756349563598633, "learning_rate": 8.425320872676753e-06, "loss": 2.8666725158691406, "step": 71730 }, { "epoch": 0.5790947910528482, "grad_norm": 0.798129677772522, "learning_rate": 8.42370540293854e-06, "loss": 2.853541946411133, "step": 71740 }, { "epoch": 0.5791755123786153, "grad_norm": 1.0647672414779663, "learning_rate": 8.422089933200327e-06, "loss": 3.144532012939453, "step": 71750 }, { "epoch": 0.5792562337043824, "grad_norm": 0.6208239197731018, "learning_rate": 8.420474463462114e-06, "loss": 2.429662322998047, "step": 71760 }, { "epoch": 0.5793369550301494, "grad_norm": 1.6806635856628418, "learning_rate": 8.4188589937239e-06, "loss": 2.7640031814575194, "step": 71770 }, { "epoch": 0.5794176763559165, "grad_norm": 0.9125630855560303, "learning_rate": 8.417243523985688e-06, "loss": 3.0111415863037108, "step": 71780 }, { "epoch": 0.5794983976816835, "grad_norm": 0.7142729163169861, "learning_rate": 8.415628054247474e-06, "loss": 2.548041343688965, "step": 71790 }, { "epoch": 0.5795791190074506, "grad_norm": 0.7769052982330322, "learning_rate": 8.414012584509262e-06, "loss": 2.7269569396972657, "step": 71800 }, { "epoch": 0.5796598403332176, "grad_norm": 1.097635269165039, "learning_rate": 8.412397114771048e-06, "loss": 2.651723861694336, "step": 71810 }, { "epoch": 0.5797405616589847, "grad_norm": 1.020472764968872, "learning_rate": 8.410781645032835e-06, "loss": 2.4906734466552733, "step": 71820 }, { "epoch": 0.5798212829847518, "grad_norm": 0.6619230508804321, "learning_rate": 8.409166175294621e-06, "loss": 2.5079654693603515, "step": 71830 }, { "epoch": 0.5799020043105187, "grad_norm": 1.1522400379180908, "learning_rate": 8.40755070555641e-06, "loss": 2.4503080368041994, "step": 71840 }, { "epoch": 0.5799827256362858, "grad_norm": 0.8373432755470276, "learning_rate": 8.405935235818195e-06, "loss": 3.2829307556152343, "step": 71850 }, { "epoch": 0.5800634469620529, "grad_norm": 1.1363638639450073, "learning_rate": 8.404319766079983e-06, "loss": 2.733220672607422, "step": 71860 }, { "epoch": 0.58014416828782, "grad_norm": 2.618144989013672, "learning_rate": 8.402704296341769e-06, "loss": 2.5349308013916017, "step": 71870 }, { "epoch": 0.580224889613587, "grad_norm": 1.1110320091247559, "learning_rate": 8.401088826603557e-06, "loss": 2.56191463470459, "step": 71880 }, { "epoch": 0.5803056109393541, "grad_norm": 1.1112632751464844, "learning_rate": 8.399473356865343e-06, "loss": 3.038666343688965, "step": 71890 }, { "epoch": 0.5803863322651212, "grad_norm": 0.9563730955123901, "learning_rate": 8.39785788712713e-06, "loss": 2.583374786376953, "step": 71900 }, { "epoch": 0.5804670535908881, "grad_norm": 1.2640013694763184, "learning_rate": 8.396242417388916e-06, "loss": 3.207600784301758, "step": 71910 }, { "epoch": 0.5805477749166552, "grad_norm": 0.7961134910583496, "learning_rate": 8.394626947650704e-06, "loss": 3.1228336334228515, "step": 71920 }, { "epoch": 0.5806284962424223, "grad_norm": 0.8035573959350586, "learning_rate": 8.39301147791249e-06, "loss": 2.3020917892456056, "step": 71930 }, { "epoch": 0.5807092175681894, "grad_norm": 1.5317314863204956, "learning_rate": 8.391396008174278e-06, "loss": 2.792535972595215, "step": 71940 }, { "epoch": 0.5807899388939564, "grad_norm": 1.323779582977295, "learning_rate": 8.389780538436064e-06, "loss": 3.0370361328125, "step": 71950 }, { "epoch": 0.5808706602197234, "grad_norm": 1.034407138824463, "learning_rate": 8.388165068697851e-06, "loss": 2.715376281738281, "step": 71960 }, { "epoch": 0.5809513815454905, "grad_norm": 0.9329734444618225, "learning_rate": 8.386549598959637e-06, "loss": 2.8510900497436524, "step": 71970 }, { "epoch": 0.5810321028712575, "grad_norm": 2.1876721382141113, "learning_rate": 8.384934129221425e-06, "loss": 2.4959331512451173, "step": 71980 }, { "epoch": 0.5811128241970246, "grad_norm": 0.7100706696510315, "learning_rate": 8.383318659483211e-06, "loss": 2.9817312240600584, "step": 71990 }, { "epoch": 0.5811935455227917, "grad_norm": 0.5740208625793457, "learning_rate": 8.381703189744999e-06, "loss": 2.209980773925781, "step": 72000 }, { "epoch": 0.5812742668485588, "grad_norm": 1.0905457735061646, "learning_rate": 8.380087720006785e-06, "loss": 2.827814483642578, "step": 72010 }, { "epoch": 0.5813549881743257, "grad_norm": 1.1319313049316406, "learning_rate": 8.378472250268572e-06, "loss": 2.7051063537597657, "step": 72020 }, { "epoch": 0.5814357095000928, "grad_norm": 0.768062174320221, "learning_rate": 8.376856780530358e-06, "loss": 2.3582826614379884, "step": 72030 }, { "epoch": 0.5815164308258599, "grad_norm": 1.484298825263977, "learning_rate": 8.375241310792146e-06, "loss": 2.804072952270508, "step": 72040 }, { "epoch": 0.5815971521516269, "grad_norm": 1.3395941257476807, "learning_rate": 8.373625841053932e-06, "loss": 2.8419605255126954, "step": 72050 }, { "epoch": 0.581677873477394, "grad_norm": 0.9177212119102478, "learning_rate": 8.37201037131572e-06, "loss": 2.597011184692383, "step": 72060 }, { "epoch": 0.581758594803161, "grad_norm": 1.5561084747314453, "learning_rate": 8.370394901577506e-06, "loss": 2.752592086791992, "step": 72070 }, { "epoch": 0.5818393161289281, "grad_norm": 0.8748804330825806, "learning_rate": 8.368779431839293e-06, "loss": 2.6058887481689452, "step": 72080 }, { "epoch": 0.5819200374546951, "grad_norm": 1.037840723991394, "learning_rate": 8.36716396210108e-06, "loss": 3.025196075439453, "step": 72090 }, { "epoch": 0.5820007587804622, "grad_norm": 1.0153909921646118, "learning_rate": 8.365548492362867e-06, "loss": 2.6032562255859375, "step": 72100 }, { "epoch": 0.5820814801062293, "grad_norm": 0.8546334505081177, "learning_rate": 8.363933022624655e-06, "loss": 2.3812576293945313, "step": 72110 }, { "epoch": 0.5821622014319964, "grad_norm": 1.048926830291748, "learning_rate": 8.36231755288644e-06, "loss": 2.247915267944336, "step": 72120 }, { "epoch": 0.5822429227577633, "grad_norm": 1.1988165378570557, "learning_rate": 8.360702083148229e-06, "loss": 3.0173582077026366, "step": 72130 }, { "epoch": 0.5823236440835304, "grad_norm": 0.893989622592926, "learning_rate": 8.359086613410015e-06, "loss": 2.9859752655029297, "step": 72140 }, { "epoch": 0.5824043654092975, "grad_norm": 0.5581751465797424, "learning_rate": 8.357471143671802e-06, "loss": 2.988450622558594, "step": 72150 }, { "epoch": 0.5824850867350645, "grad_norm": 0.6953302025794983, "learning_rate": 8.355855673933588e-06, "loss": 2.970512580871582, "step": 72160 }, { "epoch": 0.5825658080608316, "grad_norm": 0.6005995273590088, "learning_rate": 8.354240204195376e-06, "loss": 2.6676916122436523, "step": 72170 }, { "epoch": 0.5826465293865987, "grad_norm": 1.0970686674118042, "learning_rate": 8.352624734457162e-06, "loss": 2.3500799179077148, "step": 72180 }, { "epoch": 0.5827272507123658, "grad_norm": 0.8065406680107117, "learning_rate": 8.35100926471895e-06, "loss": 2.8888452529907225, "step": 72190 }, { "epoch": 0.5828079720381327, "grad_norm": 0.697738528251648, "learning_rate": 8.349393794980736e-06, "loss": 2.692498779296875, "step": 72200 }, { "epoch": 0.5828886933638998, "grad_norm": 0.9704511165618896, "learning_rate": 8.347778325242523e-06, "loss": 2.408127021789551, "step": 72210 }, { "epoch": 0.5829694146896669, "grad_norm": 0.8968528509140015, "learning_rate": 8.34616285550431e-06, "loss": 2.4517047882080076, "step": 72220 }, { "epoch": 0.5830501360154339, "grad_norm": 0.7340400815010071, "learning_rate": 8.344547385766097e-06, "loss": 2.6587217330932615, "step": 72230 }, { "epoch": 0.583130857341201, "grad_norm": 0.8947592973709106, "learning_rate": 8.342931916027883e-06, "loss": 3.0966798782348635, "step": 72240 }, { "epoch": 0.583211578666968, "grad_norm": 0.9326271414756775, "learning_rate": 8.34131644628967e-06, "loss": 2.3590778350830077, "step": 72250 }, { "epoch": 0.5832922999927351, "grad_norm": 0.6715312600135803, "learning_rate": 8.339700976551457e-06, "loss": 2.508068656921387, "step": 72260 }, { "epoch": 0.5833730213185021, "grad_norm": 1.1202845573425293, "learning_rate": 8.338085506813244e-06, "loss": 2.8425165176391602, "step": 72270 }, { "epoch": 0.5834537426442692, "grad_norm": 1.5268832445144653, "learning_rate": 8.33647003707503e-06, "loss": 3.272664260864258, "step": 72280 }, { "epoch": 0.5835344639700363, "grad_norm": 0.8505122661590576, "learning_rate": 8.334854567336818e-06, "loss": 2.912969970703125, "step": 72290 }, { "epoch": 0.5836151852958033, "grad_norm": 2.246816396713257, "learning_rate": 8.333239097598604e-06, "loss": 2.6453989028930662, "step": 72300 }, { "epoch": 0.5836959066215703, "grad_norm": 0.6503560543060303, "learning_rate": 8.331623627860392e-06, "loss": 3.304904556274414, "step": 72310 }, { "epoch": 0.5837766279473374, "grad_norm": 1.3215529918670654, "learning_rate": 8.330008158122178e-06, "loss": 2.5117319107055662, "step": 72320 }, { "epoch": 0.5838573492731045, "grad_norm": 0.7783670425415039, "learning_rate": 8.328392688383965e-06, "loss": 2.834690284729004, "step": 72330 }, { "epoch": 0.5839380705988715, "grad_norm": 0.6204478144645691, "learning_rate": 8.326777218645753e-06, "loss": 2.630772590637207, "step": 72340 }, { "epoch": 0.5840187919246386, "grad_norm": 1.3679816722869873, "learning_rate": 8.325161748907539e-06, "loss": 2.309738349914551, "step": 72350 }, { "epoch": 0.5840995132504057, "grad_norm": 1.1049163341522217, "learning_rate": 8.323546279169327e-06, "loss": 2.982571601867676, "step": 72360 }, { "epoch": 0.5841802345761726, "grad_norm": 1.0966426134109497, "learning_rate": 8.321930809431113e-06, "loss": 2.992295265197754, "step": 72370 }, { "epoch": 0.5842609559019397, "grad_norm": 1.0180221796035767, "learning_rate": 8.3203153396929e-06, "loss": 3.4285221099853516, "step": 72380 }, { "epoch": 0.5843416772277068, "grad_norm": 1.1229579448699951, "learning_rate": 8.318699869954687e-06, "loss": 2.578676223754883, "step": 72390 }, { "epoch": 0.5844223985534739, "grad_norm": 0.9120496511459351, "learning_rate": 8.317084400216474e-06, "loss": 3.1340877532958986, "step": 72400 }, { "epoch": 0.5845031198792409, "grad_norm": 0.8624101281166077, "learning_rate": 8.31546893047826e-06, "loss": 2.619798469543457, "step": 72410 }, { "epoch": 0.584583841205008, "grad_norm": 0.9788591265678406, "learning_rate": 8.313853460740048e-06, "loss": 2.7674097061157226, "step": 72420 }, { "epoch": 0.584664562530775, "grad_norm": 0.7240126132965088, "learning_rate": 8.312237991001834e-06, "loss": 2.9115118026733398, "step": 72430 }, { "epoch": 0.584745283856542, "grad_norm": 1.3364611864089966, "learning_rate": 8.310622521263622e-06, "loss": 2.732749176025391, "step": 72440 }, { "epoch": 0.5848260051823091, "grad_norm": 0.6941379904747009, "learning_rate": 8.309007051525408e-06, "loss": 2.7026166915893555, "step": 72450 }, { "epoch": 0.5849067265080762, "grad_norm": 0.6765056252479553, "learning_rate": 8.307391581787195e-06, "loss": 2.434377670288086, "step": 72460 }, { "epoch": 0.5849874478338433, "grad_norm": 0.690703272819519, "learning_rate": 8.305776112048983e-06, "loss": 2.7779497146606444, "step": 72470 }, { "epoch": 0.5850681691596102, "grad_norm": 0.7713274955749512, "learning_rate": 8.304160642310769e-06, "loss": 2.552225875854492, "step": 72480 }, { "epoch": 0.5851488904853773, "grad_norm": 0.7544453740119934, "learning_rate": 8.302545172572557e-06, "loss": 2.2220571517944334, "step": 72490 }, { "epoch": 0.5852296118111444, "grad_norm": 0.7934626936912537, "learning_rate": 8.300929702834343e-06, "loss": 2.85916690826416, "step": 72500 }, { "epoch": 0.5853103331369114, "grad_norm": 0.6596424579620361, "learning_rate": 8.29931423309613e-06, "loss": 2.6325956344604493, "step": 72510 }, { "epoch": 0.5853910544626785, "grad_norm": 0.922244668006897, "learning_rate": 8.297698763357916e-06, "loss": 2.9846223831176757, "step": 72520 }, { "epoch": 0.5854717757884456, "grad_norm": 0.7094303369522095, "learning_rate": 8.296083293619704e-06, "loss": 3.042031097412109, "step": 72530 }, { "epoch": 0.5855524971142126, "grad_norm": 1.2887574434280396, "learning_rate": 8.29446782388149e-06, "loss": 2.957962417602539, "step": 72540 }, { "epoch": 0.5856332184399796, "grad_norm": 0.895155668258667, "learning_rate": 8.292852354143278e-06, "loss": 2.732603073120117, "step": 72550 }, { "epoch": 0.5857139397657467, "grad_norm": 0.7071639895439148, "learning_rate": 8.291236884405064e-06, "loss": 2.729692840576172, "step": 72560 }, { "epoch": 0.5857946610915138, "grad_norm": 1.161299228668213, "learning_rate": 8.289621414666851e-06, "loss": 2.533547782897949, "step": 72570 }, { "epoch": 0.5858753824172809, "grad_norm": 1.0259133577346802, "learning_rate": 8.288005944928637e-06, "loss": 2.956140327453613, "step": 72580 }, { "epoch": 0.5859561037430479, "grad_norm": 0.6161561608314514, "learning_rate": 8.286390475190425e-06, "loss": 2.782238578796387, "step": 72590 }, { "epoch": 0.5860368250688149, "grad_norm": 0.6843330264091492, "learning_rate": 8.284775005452211e-06, "loss": 2.3646188735961915, "step": 72600 }, { "epoch": 0.586117546394582, "grad_norm": 1.3639483451843262, "learning_rate": 8.283159535713999e-06, "loss": 2.714404296875, "step": 72610 }, { "epoch": 0.586198267720349, "grad_norm": 0.8159114718437195, "learning_rate": 8.281544065975785e-06, "loss": 2.870785140991211, "step": 72620 }, { "epoch": 0.5862789890461161, "grad_norm": 0.998972475528717, "learning_rate": 8.279928596237573e-06, "loss": 2.7735008239746093, "step": 72630 }, { "epoch": 0.5863597103718832, "grad_norm": 0.8430586457252502, "learning_rate": 8.278313126499359e-06, "loss": 3.0503061294555662, "step": 72640 }, { "epoch": 0.5864404316976503, "grad_norm": 1.026186466217041, "learning_rate": 8.276697656761146e-06, "loss": 2.4794506072998046, "step": 72650 }, { "epoch": 0.5865211530234172, "grad_norm": 1.0687711238861084, "learning_rate": 8.275082187022932e-06, "loss": 2.8121572494506837, "step": 72660 }, { "epoch": 0.5866018743491843, "grad_norm": 1.246535301208496, "learning_rate": 8.27346671728472e-06, "loss": 2.5191909790039064, "step": 72670 }, { "epoch": 0.5866825956749514, "grad_norm": 0.8903855681419373, "learning_rate": 8.271851247546506e-06, "loss": 2.507279396057129, "step": 72680 }, { "epoch": 0.5867633170007184, "grad_norm": 0.7360866665840149, "learning_rate": 8.270235777808294e-06, "loss": 2.9426158905029296, "step": 72690 }, { "epoch": 0.5868440383264855, "grad_norm": 1.4416720867156982, "learning_rate": 8.26862030807008e-06, "loss": 2.505938720703125, "step": 72700 }, { "epoch": 0.5869247596522525, "grad_norm": 0.6921117305755615, "learning_rate": 8.267004838331867e-06, "loss": 2.7477441787719727, "step": 72710 }, { "epoch": 0.5870054809780196, "grad_norm": 1.0822991132736206, "learning_rate": 8.265389368593653e-06, "loss": 2.9787151336669924, "step": 72720 }, { "epoch": 0.5870862023037866, "grad_norm": 0.9692882299423218, "learning_rate": 8.263773898855441e-06, "loss": 2.8447328567504884, "step": 72730 }, { "epoch": 0.5871669236295537, "grad_norm": 1.477780818939209, "learning_rate": 8.262158429117227e-06, "loss": 2.911701774597168, "step": 72740 }, { "epoch": 0.5872476449553208, "grad_norm": 0.6294071674346924, "learning_rate": 8.260542959379015e-06, "loss": 2.68813419342041, "step": 72750 }, { "epoch": 0.5873283662810878, "grad_norm": 1.046128273010254, "learning_rate": 8.2589274896408e-06, "loss": 2.8154861450195314, "step": 72760 }, { "epoch": 0.5874090876068548, "grad_norm": 0.8786543011665344, "learning_rate": 8.257312019902588e-06, "loss": 2.5122024536132814, "step": 72770 }, { "epoch": 0.5874898089326219, "grad_norm": 1.431709885597229, "learning_rate": 8.255696550164374e-06, "loss": 2.4829036712646486, "step": 72780 }, { "epoch": 0.587570530258389, "grad_norm": 1.2598459720611572, "learning_rate": 8.254081080426162e-06, "loss": 3.2042354583740233, "step": 72790 }, { "epoch": 0.587651251584156, "grad_norm": 1.4245269298553467, "learning_rate": 8.252465610687948e-06, "loss": 2.7111865997314455, "step": 72800 }, { "epoch": 0.5877319729099231, "grad_norm": 0.7146865129470825, "learning_rate": 8.250850140949736e-06, "loss": 2.4357152938842774, "step": 72810 }, { "epoch": 0.5878126942356902, "grad_norm": 0.8343328833580017, "learning_rate": 8.249234671211522e-06, "loss": 2.456800079345703, "step": 72820 }, { "epoch": 0.5878934155614571, "grad_norm": 0.8310588002204895, "learning_rate": 8.24761920147331e-06, "loss": 2.5671958923339844, "step": 72830 }, { "epoch": 0.5879741368872242, "grad_norm": 0.7299116849899292, "learning_rate": 8.246003731735095e-06, "loss": 2.7882740020751955, "step": 72840 }, { "epoch": 0.5880548582129913, "grad_norm": 1.339128851890564, "learning_rate": 8.244388261996883e-06, "loss": 2.911872673034668, "step": 72850 }, { "epoch": 0.5881355795387584, "grad_norm": 0.8460967540740967, "learning_rate": 8.242772792258669e-06, "loss": 2.798407554626465, "step": 72860 }, { "epoch": 0.5882163008645254, "grad_norm": 0.944456160068512, "learning_rate": 8.241157322520457e-06, "loss": 2.8757333755493164, "step": 72870 }, { "epoch": 0.5882970221902925, "grad_norm": 1.0018287897109985, "learning_rate": 8.239541852782243e-06, "loss": 3.0969964981079103, "step": 72880 }, { "epoch": 0.5883777435160595, "grad_norm": 0.5371373891830444, "learning_rate": 8.23792638304403e-06, "loss": 2.6884227752685548, "step": 72890 }, { "epoch": 0.5884584648418265, "grad_norm": 0.874212384223938, "learning_rate": 8.236310913305816e-06, "loss": 2.663096618652344, "step": 72900 }, { "epoch": 0.5885391861675936, "grad_norm": 1.0252742767333984, "learning_rate": 8.234695443567604e-06, "loss": 2.9532180786132813, "step": 72910 }, { "epoch": 0.5886199074933607, "grad_norm": 0.7436986565589905, "learning_rate": 8.23307997382939e-06, "loss": 2.660203552246094, "step": 72920 }, { "epoch": 0.5887006288191278, "grad_norm": 0.7838349938392639, "learning_rate": 8.231464504091178e-06, "loss": 2.929488945007324, "step": 72930 }, { "epoch": 0.5887813501448947, "grad_norm": 0.792406439781189, "learning_rate": 8.229849034352964e-06, "loss": 2.2306385040283203, "step": 72940 }, { "epoch": 0.5888620714706618, "grad_norm": 1.2847399711608887, "learning_rate": 8.228233564614752e-06, "loss": 2.740884208679199, "step": 72950 }, { "epoch": 0.5889427927964289, "grad_norm": 1.7232822179794312, "learning_rate": 8.226618094876538e-06, "loss": 2.5860681533813477, "step": 72960 }, { "epoch": 0.5890235141221959, "grad_norm": 0.8690812587738037, "learning_rate": 8.225002625138325e-06, "loss": 2.4839738845825194, "step": 72970 }, { "epoch": 0.589104235447963, "grad_norm": 1.0200976133346558, "learning_rate": 8.223387155400113e-06, "loss": 3.043750190734863, "step": 72980 }, { "epoch": 0.5891849567737301, "grad_norm": 1.0975356101989746, "learning_rate": 8.221771685661899e-06, "loss": 2.61676025390625, "step": 72990 }, { "epoch": 0.5892656780994971, "grad_norm": 0.5990383625030518, "learning_rate": 8.220156215923687e-06, "loss": 2.7453237533569337, "step": 73000 }, { "epoch": 0.5893463994252641, "grad_norm": 1.4215012788772583, "learning_rate": 8.218540746185473e-06, "loss": 3.265523910522461, "step": 73010 }, { "epoch": 0.5894271207510312, "grad_norm": 1.2383263111114502, "learning_rate": 8.21692527644726e-06, "loss": 2.350836753845215, "step": 73020 }, { "epoch": 0.5895078420767983, "grad_norm": 1.7828083038330078, "learning_rate": 8.215309806709046e-06, "loss": 2.9589609146118163, "step": 73030 }, { "epoch": 0.5895885634025653, "grad_norm": 0.6622313261032104, "learning_rate": 8.213694336970834e-06, "loss": 3.3028697967529297, "step": 73040 }, { "epoch": 0.5896692847283324, "grad_norm": 0.8635697960853577, "learning_rate": 8.21207886723262e-06, "loss": 2.983874702453613, "step": 73050 }, { "epoch": 0.5897500060540994, "grad_norm": 0.7107543349266052, "learning_rate": 8.210463397494408e-06, "loss": 2.749979591369629, "step": 73060 }, { "epoch": 0.5898307273798665, "grad_norm": 0.9881919026374817, "learning_rate": 8.208847927756194e-06, "loss": 2.5340457916259767, "step": 73070 }, { "epoch": 0.5899114487056335, "grad_norm": 0.8101471066474915, "learning_rate": 8.207232458017981e-06, "loss": 2.392490196228027, "step": 73080 }, { "epoch": 0.5899921700314006, "grad_norm": 1.258176565170288, "learning_rate": 8.205616988279767e-06, "loss": 2.4924652099609377, "step": 73090 }, { "epoch": 0.5900728913571677, "grad_norm": 0.6180135011672974, "learning_rate": 8.204001518541555e-06, "loss": 2.8573635101318358, "step": 73100 }, { "epoch": 0.5901536126829348, "grad_norm": 2.8143062591552734, "learning_rate": 8.202386048803341e-06, "loss": 3.0965339660644533, "step": 73110 }, { "epoch": 0.5902343340087017, "grad_norm": 1.1048134565353394, "learning_rate": 8.200770579065129e-06, "loss": 2.7963333129882812, "step": 73120 }, { "epoch": 0.5903150553344688, "grad_norm": 1.069899320602417, "learning_rate": 8.199155109326915e-06, "loss": 2.668191337585449, "step": 73130 }, { "epoch": 0.5903957766602359, "grad_norm": 0.7658594846725464, "learning_rate": 8.197539639588702e-06, "loss": 2.779638671875, "step": 73140 }, { "epoch": 0.5904764979860029, "grad_norm": 1.0346497297286987, "learning_rate": 8.195924169850488e-06, "loss": 3.1025352478027344, "step": 73150 }, { "epoch": 0.59055721931177, "grad_norm": 0.9728754162788391, "learning_rate": 8.194308700112276e-06, "loss": 3.3890724182128906, "step": 73160 }, { "epoch": 0.590637940637537, "grad_norm": 0.8698610067367554, "learning_rate": 8.192693230374062e-06, "loss": 2.5092626571655274, "step": 73170 }, { "epoch": 0.5907186619633041, "grad_norm": 1.4526236057281494, "learning_rate": 8.19107776063585e-06, "loss": 2.7057294845581055, "step": 73180 }, { "epoch": 0.5907993832890711, "grad_norm": 1.8609381914138794, "learning_rate": 8.189462290897636e-06, "loss": 3.5585670471191406, "step": 73190 }, { "epoch": 0.5908801046148382, "grad_norm": 0.6260614395141602, "learning_rate": 8.187846821159424e-06, "loss": 2.7656326293945312, "step": 73200 }, { "epoch": 0.5909608259406053, "grad_norm": 0.7741276621818542, "learning_rate": 8.18623135142121e-06, "loss": 3.0737592697143556, "step": 73210 }, { "epoch": 0.5910415472663723, "grad_norm": 0.9839612245559692, "learning_rate": 8.184615881682997e-06, "loss": 2.5948490142822265, "step": 73220 }, { "epoch": 0.5911222685921393, "grad_norm": 1.1657339334487915, "learning_rate": 8.183000411944783e-06, "loss": 2.9042720794677734, "step": 73230 }, { "epoch": 0.5912029899179064, "grad_norm": 0.7844077944755554, "learning_rate": 8.181384942206571e-06, "loss": 2.919716644287109, "step": 73240 }, { "epoch": 0.5912837112436735, "grad_norm": 1.0028693675994873, "learning_rate": 8.179769472468357e-06, "loss": 2.466267395019531, "step": 73250 }, { "epoch": 0.5913644325694405, "grad_norm": 1.3640090227127075, "learning_rate": 8.178154002730145e-06, "loss": 2.6365890502929688, "step": 73260 }, { "epoch": 0.5914451538952076, "grad_norm": 0.5924000144004822, "learning_rate": 8.17653853299193e-06, "loss": 3.0647262573242187, "step": 73270 }, { "epoch": 0.5915258752209747, "grad_norm": 0.8094006180763245, "learning_rate": 8.174923063253718e-06, "loss": 2.389089012145996, "step": 73280 }, { "epoch": 0.5916065965467416, "grad_norm": 0.9356892704963684, "learning_rate": 8.173307593515504e-06, "loss": 2.603598785400391, "step": 73290 }, { "epoch": 0.5916873178725087, "grad_norm": 0.7175174355506897, "learning_rate": 8.171692123777292e-06, "loss": 2.515834999084473, "step": 73300 }, { "epoch": 0.5917680391982758, "grad_norm": 0.6488808393478394, "learning_rate": 8.170076654039078e-06, "loss": 3.067308235168457, "step": 73310 }, { "epoch": 0.5918487605240429, "grad_norm": 0.8679216504096985, "learning_rate": 8.168461184300866e-06, "loss": 2.6278606414794923, "step": 73320 }, { "epoch": 0.5919294818498099, "grad_norm": 1.279923439025879, "learning_rate": 8.166845714562652e-06, "loss": 2.6750593185424805, "step": 73330 }, { "epoch": 0.592010203175577, "grad_norm": 0.8428487181663513, "learning_rate": 8.16523024482444e-06, "loss": 2.779929351806641, "step": 73340 }, { "epoch": 0.592090924501344, "grad_norm": 1.0358078479766846, "learning_rate": 8.163614775086225e-06, "loss": 3.1064661026000975, "step": 73350 }, { "epoch": 0.592171645827111, "grad_norm": 1.3092001676559448, "learning_rate": 8.161999305348013e-06, "loss": 2.8014602661132812, "step": 73360 }, { "epoch": 0.5922523671528781, "grad_norm": 1.5631382465362549, "learning_rate": 8.160383835609799e-06, "loss": 2.7098581314086916, "step": 73370 }, { "epoch": 0.5923330884786452, "grad_norm": 0.6704261302947998, "learning_rate": 8.158768365871587e-06, "loss": 2.9944501876831056, "step": 73380 }, { "epoch": 0.5924138098044123, "grad_norm": 1.2984962463378906, "learning_rate": 8.157152896133373e-06, "loss": 2.6455289840698244, "step": 73390 }, { "epoch": 0.5924945311301792, "grad_norm": 0.9433422684669495, "learning_rate": 8.15553742639516e-06, "loss": 2.7981090545654297, "step": 73400 }, { "epoch": 0.5925752524559463, "grad_norm": 1.0731385946273804, "learning_rate": 8.153921956656946e-06, "loss": 2.4850742340087892, "step": 73410 }, { "epoch": 0.5926559737817134, "grad_norm": 0.8107699155807495, "learning_rate": 8.152306486918734e-06, "loss": 2.5331148147583007, "step": 73420 }, { "epoch": 0.5927366951074804, "grad_norm": 0.7302916049957275, "learning_rate": 8.15069101718052e-06, "loss": 2.7559511184692385, "step": 73430 }, { "epoch": 0.5928174164332475, "grad_norm": 0.5295416712760925, "learning_rate": 8.149075547442308e-06, "loss": 2.851858711242676, "step": 73440 }, { "epoch": 0.5928981377590146, "grad_norm": 10.999906539916992, "learning_rate": 8.147460077704094e-06, "loss": 3.045802879333496, "step": 73450 }, { "epoch": 0.5929788590847817, "grad_norm": 0.7897850871086121, "learning_rate": 8.145844607965882e-06, "loss": 2.540592384338379, "step": 73460 }, { "epoch": 0.5930595804105486, "grad_norm": 0.8267548084259033, "learning_rate": 8.144229138227668e-06, "loss": 3.007556915283203, "step": 73470 }, { "epoch": 0.5931403017363157, "grad_norm": 1.0970169305801392, "learning_rate": 8.142613668489455e-06, "loss": 2.7829999923706055, "step": 73480 }, { "epoch": 0.5932210230620828, "grad_norm": 0.5729115605354309, "learning_rate": 8.140998198751243e-06, "loss": 2.3509284973144533, "step": 73490 }, { "epoch": 0.5933017443878498, "grad_norm": 0.7152381539344788, "learning_rate": 8.139382729013029e-06, "loss": 2.805888557434082, "step": 73500 }, { "epoch": 0.5933824657136169, "grad_norm": 0.832369327545166, "learning_rate": 8.137767259274817e-06, "loss": 2.965627670288086, "step": 73510 }, { "epoch": 0.5934631870393839, "grad_norm": 0.9455954432487488, "learning_rate": 8.136151789536603e-06, "loss": 2.5988330841064453, "step": 73520 }, { "epoch": 0.593543908365151, "grad_norm": 0.8170303702354431, "learning_rate": 8.13453631979839e-06, "loss": 3.052540397644043, "step": 73530 }, { "epoch": 0.593624629690918, "grad_norm": 1.0391836166381836, "learning_rate": 8.132920850060176e-06, "loss": 2.652126121520996, "step": 73540 }, { "epoch": 0.5937053510166851, "grad_norm": 1.1933653354644775, "learning_rate": 8.131305380321964e-06, "loss": 2.7467336654663086, "step": 73550 }, { "epoch": 0.5937860723424522, "grad_norm": 0.6355376243591309, "learning_rate": 8.12968991058375e-06, "loss": 2.4838491439819337, "step": 73560 }, { "epoch": 0.5938667936682193, "grad_norm": 0.8686370849609375, "learning_rate": 8.128074440845538e-06, "loss": 3.074054718017578, "step": 73570 }, { "epoch": 0.5939475149939862, "grad_norm": 1.0070215463638306, "learning_rate": 8.126458971107324e-06, "loss": 2.5998037338256834, "step": 73580 }, { "epoch": 0.5940282363197533, "grad_norm": 0.9669530987739563, "learning_rate": 8.124843501369111e-06, "loss": 2.802888298034668, "step": 73590 }, { "epoch": 0.5941089576455204, "grad_norm": 0.9154316186904907, "learning_rate": 8.123228031630899e-06, "loss": 2.408938980102539, "step": 73600 }, { "epoch": 0.5941896789712874, "grad_norm": 0.8575104475021362, "learning_rate": 8.121612561892685e-06, "loss": 2.821651649475098, "step": 73610 }, { "epoch": 0.5942704002970545, "grad_norm": 0.7521663904190063, "learning_rate": 8.119997092154473e-06, "loss": 2.60827579498291, "step": 73620 }, { "epoch": 0.5943511216228216, "grad_norm": 0.8844873309135437, "learning_rate": 8.118381622416259e-06, "loss": 2.7518028259277343, "step": 73630 }, { "epoch": 0.5944318429485886, "grad_norm": 1.0511205196380615, "learning_rate": 8.116766152678046e-06, "loss": 2.878429412841797, "step": 73640 }, { "epoch": 0.5945125642743556, "grad_norm": 0.8803327083587646, "learning_rate": 8.115150682939832e-06, "loss": 2.9381698608398437, "step": 73650 }, { "epoch": 0.5945932856001227, "grad_norm": 1.1239371299743652, "learning_rate": 8.11353521320162e-06, "loss": 2.7695226669311523, "step": 73660 }, { "epoch": 0.5946740069258898, "grad_norm": 0.9750775098800659, "learning_rate": 8.111919743463406e-06, "loss": 2.873258590698242, "step": 73670 }, { "epoch": 0.5947547282516568, "grad_norm": 1.3018430471420288, "learning_rate": 8.110304273725194e-06, "loss": 2.3372222900390627, "step": 73680 }, { "epoch": 0.5948354495774238, "grad_norm": 1.1869359016418457, "learning_rate": 8.10868880398698e-06, "loss": 3.1531578063964845, "step": 73690 }, { "epoch": 0.5949161709031909, "grad_norm": 1.0108133554458618, "learning_rate": 8.107073334248768e-06, "loss": 2.6239721298217775, "step": 73700 }, { "epoch": 0.594996892228958, "grad_norm": 1.2554453611373901, "learning_rate": 8.105457864510554e-06, "loss": 2.7205999374389647, "step": 73710 }, { "epoch": 0.595077613554725, "grad_norm": 0.8968347311019897, "learning_rate": 8.103842394772341e-06, "loss": 3.0509979248046877, "step": 73720 }, { "epoch": 0.5951583348804921, "grad_norm": 0.7490991353988647, "learning_rate": 8.102226925034127e-06, "loss": 2.88907413482666, "step": 73730 }, { "epoch": 0.5952390562062592, "grad_norm": 0.7518627047538757, "learning_rate": 8.100611455295915e-06, "loss": 2.7723621368408202, "step": 73740 }, { "epoch": 0.5953197775320261, "grad_norm": 0.8060529828071594, "learning_rate": 8.098995985557701e-06, "loss": 2.4686532974243165, "step": 73750 }, { "epoch": 0.5954004988577932, "grad_norm": 0.8820107579231262, "learning_rate": 8.097380515819489e-06, "loss": 2.56942138671875, "step": 73760 }, { "epoch": 0.5954812201835603, "grad_norm": 1.9088366031646729, "learning_rate": 8.095765046081275e-06, "loss": 3.3257987976074217, "step": 73770 }, { "epoch": 0.5955619415093274, "grad_norm": 0.6991567611694336, "learning_rate": 8.094149576343062e-06, "loss": 2.461170768737793, "step": 73780 }, { "epoch": 0.5956426628350944, "grad_norm": 0.9911149144172668, "learning_rate": 8.092534106604848e-06, "loss": 2.5562171936035156, "step": 73790 }, { "epoch": 0.5957233841608615, "grad_norm": 1.075400471687317, "learning_rate": 8.090918636866636e-06, "loss": 2.4261873245239256, "step": 73800 }, { "epoch": 0.5958041054866285, "grad_norm": 1.2363300323486328, "learning_rate": 8.089303167128422e-06, "loss": 3.4151901245117187, "step": 73810 }, { "epoch": 0.5958848268123955, "grad_norm": 0.9429625272750854, "learning_rate": 8.08768769739021e-06, "loss": 2.583776664733887, "step": 73820 }, { "epoch": 0.5959655481381626, "grad_norm": 1.008886456489563, "learning_rate": 8.086072227651996e-06, "loss": 2.637887382507324, "step": 73830 }, { "epoch": 0.5960462694639297, "grad_norm": 0.8383353352546692, "learning_rate": 8.084456757913783e-06, "loss": 2.6528453826904297, "step": 73840 }, { "epoch": 0.5961269907896968, "grad_norm": 0.8222419023513794, "learning_rate": 8.082841288175571e-06, "loss": 2.5205554962158203, "step": 73850 }, { "epoch": 0.5962077121154638, "grad_norm": 0.7023344039916992, "learning_rate": 8.081225818437357e-06, "loss": 2.83978328704834, "step": 73860 }, { "epoch": 0.5962884334412308, "grad_norm": 1.339795708656311, "learning_rate": 8.079610348699145e-06, "loss": 2.4999576568603517, "step": 73870 }, { "epoch": 0.5963691547669979, "grad_norm": 2.031625747680664, "learning_rate": 8.07799487896093e-06, "loss": 2.664427947998047, "step": 73880 }, { "epoch": 0.5964498760927649, "grad_norm": 0.80450439453125, "learning_rate": 8.076379409222718e-06, "loss": 2.6673700332641603, "step": 73890 }, { "epoch": 0.596530597418532, "grad_norm": 1.1255899667739868, "learning_rate": 8.074763939484504e-06, "loss": 2.836978721618652, "step": 73900 }, { "epoch": 0.5966113187442991, "grad_norm": 0.9835299253463745, "learning_rate": 8.073148469746292e-06, "loss": 2.5947938919067384, "step": 73910 }, { "epoch": 0.5966920400700662, "grad_norm": 0.8333132863044739, "learning_rate": 8.071533000008078e-06, "loss": 3.33472900390625, "step": 73920 }, { "epoch": 0.5967727613958331, "grad_norm": 0.9023205637931824, "learning_rate": 8.069917530269866e-06, "loss": 2.4734159469604493, "step": 73930 }, { "epoch": 0.5968534827216002, "grad_norm": 1.0222662687301636, "learning_rate": 8.068302060531652e-06, "loss": 2.572986602783203, "step": 73940 }, { "epoch": 0.5969342040473673, "grad_norm": 0.9741207957267761, "learning_rate": 8.06668659079344e-06, "loss": 2.9599084854125977, "step": 73950 }, { "epoch": 0.5970149253731343, "grad_norm": 1.1323716640472412, "learning_rate": 8.065071121055226e-06, "loss": 2.7268680572509765, "step": 73960 }, { "epoch": 0.5970956466989014, "grad_norm": 1.616411566734314, "learning_rate": 8.063455651317013e-06, "loss": 2.6167724609375, "step": 73970 }, { "epoch": 0.5971763680246684, "grad_norm": 0.8116227984428406, "learning_rate": 8.0618401815788e-06, "loss": 2.6488338470458985, "step": 73980 }, { "epoch": 0.5972570893504355, "grad_norm": 0.7605137825012207, "learning_rate": 8.060224711840587e-06, "loss": 2.4311525344848635, "step": 73990 }, { "epoch": 0.5973378106762025, "grad_norm": 1.1124770641326904, "learning_rate": 8.058609242102373e-06, "loss": 2.8010364532470704, "step": 74000 }, { "epoch": 0.5974185320019696, "grad_norm": 1.1129616498947144, "learning_rate": 8.05699377236416e-06, "loss": 2.823306655883789, "step": 74010 }, { "epoch": 0.5974992533277367, "grad_norm": 1.213935136795044, "learning_rate": 8.055378302625947e-06, "loss": 2.7100479125976564, "step": 74020 }, { "epoch": 0.5975799746535037, "grad_norm": 1.1389708518981934, "learning_rate": 8.053762832887734e-06, "loss": 2.651605796813965, "step": 74030 }, { "epoch": 0.5976606959792707, "grad_norm": 0.7948799729347229, "learning_rate": 8.05214736314952e-06, "loss": 2.8065540313720705, "step": 74040 }, { "epoch": 0.5977414173050378, "grad_norm": 1.0888875722885132, "learning_rate": 8.050531893411308e-06, "loss": 2.5739465713500977, "step": 74050 }, { "epoch": 0.5978221386308049, "grad_norm": 0.8500862717628479, "learning_rate": 8.048916423673094e-06, "loss": 2.7377574920654295, "step": 74060 }, { "epoch": 0.5979028599565719, "grad_norm": 1.002302885055542, "learning_rate": 8.047300953934882e-06, "loss": 2.4942148208618162, "step": 74070 }, { "epoch": 0.597983581282339, "grad_norm": 1.9026485681533813, "learning_rate": 8.045685484196668e-06, "loss": 2.844502258300781, "step": 74080 }, { "epoch": 0.5980643026081061, "grad_norm": 0.6932483911514282, "learning_rate": 8.044070014458455e-06, "loss": 2.9870346069335936, "step": 74090 }, { "epoch": 0.5981450239338731, "grad_norm": 0.4872587323188782, "learning_rate": 8.042454544720241e-06, "loss": 2.303053283691406, "step": 74100 }, { "epoch": 0.5982257452596401, "grad_norm": 0.992246150970459, "learning_rate": 8.040839074982029e-06, "loss": 2.5176456451416014, "step": 74110 }, { "epoch": 0.5983064665854072, "grad_norm": 1.5467031002044678, "learning_rate": 8.039223605243815e-06, "loss": 2.572150230407715, "step": 74120 }, { "epoch": 0.5983871879111743, "grad_norm": 0.9326387643814087, "learning_rate": 8.037608135505603e-06, "loss": 2.6206201553344726, "step": 74130 }, { "epoch": 0.5984679092369413, "grad_norm": 1.2582296133041382, "learning_rate": 8.035992665767389e-06, "loss": 2.761933517456055, "step": 74140 }, { "epoch": 0.5985486305627084, "grad_norm": 1.0101269483566284, "learning_rate": 8.034377196029176e-06, "loss": 2.639281463623047, "step": 74150 }, { "epoch": 0.5986293518884754, "grad_norm": 0.803583025932312, "learning_rate": 8.032761726290962e-06, "loss": 2.403701591491699, "step": 74160 }, { "epoch": 0.5987100732142425, "grad_norm": 1.1701699495315552, "learning_rate": 8.03114625655275e-06, "loss": 2.835105323791504, "step": 74170 }, { "epoch": 0.5987907945400095, "grad_norm": 1.0293585062026978, "learning_rate": 8.029530786814536e-06, "loss": 2.7430496215820312, "step": 74180 }, { "epoch": 0.5988715158657766, "grad_norm": 1.7710717916488647, "learning_rate": 8.027915317076324e-06, "loss": 2.7035234451293944, "step": 74190 }, { "epoch": 0.5989522371915437, "grad_norm": 1.0746514797210693, "learning_rate": 8.02629984733811e-06, "loss": 2.3791812896728515, "step": 74200 }, { "epoch": 0.5990329585173106, "grad_norm": 1.0920922756195068, "learning_rate": 8.024684377599898e-06, "loss": 3.037753868103027, "step": 74210 }, { "epoch": 0.5991136798430777, "grad_norm": 1.2650357484817505, "learning_rate": 8.023068907861684e-06, "loss": 2.813175010681152, "step": 74220 }, { "epoch": 0.5991944011688448, "grad_norm": 1.0423392057418823, "learning_rate": 8.021453438123471e-06, "loss": 2.886952590942383, "step": 74230 }, { "epoch": 0.5992751224946119, "grad_norm": 0.938653290271759, "learning_rate": 8.019837968385257e-06, "loss": 2.508859062194824, "step": 74240 }, { "epoch": 0.5993558438203789, "grad_norm": 1.1501169204711914, "learning_rate": 8.018222498647045e-06, "loss": 2.7042022705078126, "step": 74250 }, { "epoch": 0.599436565146146, "grad_norm": 1.1000816822052002, "learning_rate": 8.016607028908831e-06, "loss": 2.3073244094848633, "step": 74260 }, { "epoch": 0.599517286471913, "grad_norm": 0.6941162943840027, "learning_rate": 8.014991559170619e-06, "loss": 2.617092514038086, "step": 74270 }, { "epoch": 0.59959800779768, "grad_norm": 0.8574686050415039, "learning_rate": 8.013376089432405e-06, "loss": 2.600386619567871, "step": 74280 }, { "epoch": 0.5996787291234471, "grad_norm": 0.9842190742492676, "learning_rate": 8.011760619694192e-06, "loss": 2.9268245697021484, "step": 74290 }, { "epoch": 0.5997594504492142, "grad_norm": 0.6443807482719421, "learning_rate": 8.010145149955978e-06, "loss": 2.5395809173583985, "step": 74300 }, { "epoch": 0.5998401717749813, "grad_norm": 1.005918264389038, "learning_rate": 8.008529680217766e-06, "loss": 2.7987695693969727, "step": 74310 }, { "epoch": 0.5999208931007483, "grad_norm": 0.9027805924415588, "learning_rate": 8.006914210479552e-06, "loss": 2.5023082733154296, "step": 74320 }, { "epoch": 0.6000016144265153, "grad_norm": 0.7943449020385742, "learning_rate": 8.00529874074134e-06, "loss": 3.8558895111083986, "step": 74330 }, { "epoch": 0.6000823357522824, "grad_norm": 2.1661620140075684, "learning_rate": 8.003683271003126e-06, "loss": 2.7215721130371096, "step": 74340 }, { "epoch": 0.6001630570780494, "grad_norm": 0.7827486395835876, "learning_rate": 8.002067801264913e-06, "loss": 3.025784492492676, "step": 74350 }, { "epoch": 0.6002437784038165, "grad_norm": 0.7862641215324402, "learning_rate": 8.0004523315267e-06, "loss": 2.6703290939331055, "step": 74360 }, { "epoch": 0.6003244997295836, "grad_norm": 0.7190737724304199, "learning_rate": 7.998836861788487e-06, "loss": 2.553135108947754, "step": 74370 }, { "epoch": 0.6004052210553507, "grad_norm": 1.0014889240264893, "learning_rate": 7.997221392050275e-06, "loss": 2.680328941345215, "step": 74380 }, { "epoch": 0.6004859423811176, "grad_norm": 0.7690908312797546, "learning_rate": 7.99560592231206e-06, "loss": 2.890340042114258, "step": 74390 }, { "epoch": 0.6005666637068847, "grad_norm": 0.9931450486183167, "learning_rate": 7.993990452573848e-06, "loss": 2.8177791595458985, "step": 74400 }, { "epoch": 0.6006473850326518, "grad_norm": 1.031975269317627, "learning_rate": 7.992374982835634e-06, "loss": 2.821196174621582, "step": 74410 }, { "epoch": 0.6007281063584188, "grad_norm": 0.7569979429244995, "learning_rate": 7.990759513097422e-06, "loss": 3.052027702331543, "step": 74420 }, { "epoch": 0.6008088276841859, "grad_norm": 2.6744003295898438, "learning_rate": 7.989144043359208e-06, "loss": 3.0469593048095702, "step": 74430 }, { "epoch": 0.600889549009953, "grad_norm": 1.9539676904678345, "learning_rate": 7.987528573620996e-06, "loss": 2.5779497146606447, "step": 74440 }, { "epoch": 0.60097027033572, "grad_norm": 0.8411200046539307, "learning_rate": 7.985913103882782e-06, "loss": 2.7486284255981444, "step": 74450 }, { "epoch": 0.601050991661487, "grad_norm": 0.8070783615112305, "learning_rate": 7.98429763414457e-06, "loss": 2.9604166030883787, "step": 74460 }, { "epoch": 0.6011317129872541, "grad_norm": 0.7792062163352966, "learning_rate": 7.982682164406356e-06, "loss": 2.690473747253418, "step": 74470 }, { "epoch": 0.6012124343130212, "grad_norm": 0.9486032128334045, "learning_rate": 7.981066694668143e-06, "loss": 2.8060083389282227, "step": 74480 }, { "epoch": 0.6012931556387882, "grad_norm": 1.0729671716690063, "learning_rate": 7.97945122492993e-06, "loss": 2.8796987533569336, "step": 74490 }, { "epoch": 0.6013738769645552, "grad_norm": 0.8247706890106201, "learning_rate": 7.977835755191717e-06, "loss": 2.994398307800293, "step": 74500 }, { "epoch": 0.6014545982903223, "grad_norm": 1.082705020904541, "learning_rate": 7.976220285453503e-06, "loss": 3.1534149169921877, "step": 74510 }, { "epoch": 0.6015353196160894, "grad_norm": 0.6508690118789673, "learning_rate": 7.97460481571529e-06, "loss": 2.837840270996094, "step": 74520 }, { "epoch": 0.6016160409418564, "grad_norm": 0.8548698425292969, "learning_rate": 7.972989345977077e-06, "loss": 2.5561655044555662, "step": 74530 }, { "epoch": 0.6016967622676235, "grad_norm": 1.1743829250335693, "learning_rate": 7.971373876238864e-06, "loss": 3.1500545501708985, "step": 74540 }, { "epoch": 0.6017774835933906, "grad_norm": 0.6021761894226074, "learning_rate": 7.96975840650065e-06, "loss": 2.6564693450927734, "step": 74550 }, { "epoch": 0.6018582049191576, "grad_norm": 0.7636045217514038, "learning_rate": 7.968142936762438e-06, "loss": 2.7704389572143553, "step": 74560 }, { "epoch": 0.6019389262449246, "grad_norm": 0.8301606178283691, "learning_rate": 7.966527467024224e-06, "loss": 2.448077392578125, "step": 74570 }, { "epoch": 0.6020196475706917, "grad_norm": 0.8155203461647034, "learning_rate": 7.964911997286012e-06, "loss": 2.8696332931518556, "step": 74580 }, { "epoch": 0.6021003688964588, "grad_norm": 0.7486430406570435, "learning_rate": 7.963296527547798e-06, "loss": 2.5453100204467773, "step": 74590 }, { "epoch": 0.6021810902222258, "grad_norm": 2.2665576934814453, "learning_rate": 7.961681057809585e-06, "loss": 2.3440387725830076, "step": 74600 }, { "epoch": 0.6022618115479929, "grad_norm": 1.0756733417510986, "learning_rate": 7.960065588071371e-06, "loss": 2.640716552734375, "step": 74610 }, { "epoch": 0.6023425328737599, "grad_norm": 1.089985966682434, "learning_rate": 7.958450118333159e-06, "loss": 2.795157241821289, "step": 74620 }, { "epoch": 0.602423254199527, "grad_norm": 1.1016019582748413, "learning_rate": 7.956834648594945e-06, "loss": 2.6450384140014647, "step": 74630 }, { "epoch": 0.602503975525294, "grad_norm": 1.3073041439056396, "learning_rate": 7.955219178856733e-06, "loss": 2.6320211410522463, "step": 74640 }, { "epoch": 0.6025846968510611, "grad_norm": 0.9634758234024048, "learning_rate": 7.953603709118519e-06, "loss": 2.810148811340332, "step": 74650 }, { "epoch": 0.6026654181768282, "grad_norm": 0.8302943110466003, "learning_rate": 7.951988239380306e-06, "loss": 2.8784080505371095, "step": 74660 }, { "epoch": 0.6027461395025951, "grad_norm": 0.8988453149795532, "learning_rate": 7.950372769642092e-06, "loss": 2.5544071197509766, "step": 74670 }, { "epoch": 0.6028268608283622, "grad_norm": 0.8742359280586243, "learning_rate": 7.94875729990388e-06, "loss": 2.8894805908203125, "step": 74680 }, { "epoch": 0.6029075821541293, "grad_norm": 1.037914752960205, "learning_rate": 7.947141830165666e-06, "loss": 2.5725725173950194, "step": 74690 }, { "epoch": 0.6029883034798964, "grad_norm": 0.8869883418083191, "learning_rate": 7.945526360427454e-06, "loss": 2.6871570587158202, "step": 74700 }, { "epoch": 0.6030690248056634, "grad_norm": 0.8758140802383423, "learning_rate": 7.94391089068924e-06, "loss": 2.6983728408813477, "step": 74710 }, { "epoch": 0.6031497461314305, "grad_norm": 0.9703887701034546, "learning_rate": 7.942295420951027e-06, "loss": 2.911279296875, "step": 74720 }, { "epoch": 0.6032304674571976, "grad_norm": 1.0983893871307373, "learning_rate": 7.940679951212813e-06, "loss": 2.9136655807495115, "step": 74730 }, { "epoch": 0.6033111887829645, "grad_norm": 1.904059886932373, "learning_rate": 7.939064481474601e-06, "loss": 3.0022010803222656, "step": 74740 }, { "epoch": 0.6033919101087316, "grad_norm": 1.155027151107788, "learning_rate": 7.937449011736387e-06, "loss": 3.131788444519043, "step": 74750 }, { "epoch": 0.6034726314344987, "grad_norm": 0.7814151048660278, "learning_rate": 7.935833541998175e-06, "loss": 2.8166473388671873, "step": 74760 }, { "epoch": 0.6035533527602658, "grad_norm": 0.7682358622550964, "learning_rate": 7.934218072259961e-06, "loss": 2.7619550704956053, "step": 74770 }, { "epoch": 0.6036340740860328, "grad_norm": 1.6138012409210205, "learning_rate": 7.932602602521749e-06, "loss": 3.4342281341552736, "step": 74780 }, { "epoch": 0.6037147954117998, "grad_norm": 0.6982199549674988, "learning_rate": 7.930987132783535e-06, "loss": 2.507724571228027, "step": 74790 }, { "epoch": 0.6037955167375669, "grad_norm": 0.8248694539070129, "learning_rate": 7.929371663045322e-06, "loss": 2.8572431564331056, "step": 74800 }, { "epoch": 0.6038762380633339, "grad_norm": 0.987576961517334, "learning_rate": 7.927756193307108e-06, "loss": 2.9043020248413085, "step": 74810 }, { "epoch": 0.603956959389101, "grad_norm": 0.7116509675979614, "learning_rate": 7.926140723568896e-06, "loss": 2.6815046310424804, "step": 74820 }, { "epoch": 0.6040376807148681, "grad_norm": 0.7447063326835632, "learning_rate": 7.924525253830682e-06, "loss": 2.7637041091918944, "step": 74830 }, { "epoch": 0.6041184020406352, "grad_norm": 0.8679949045181274, "learning_rate": 7.922909784092471e-06, "loss": 2.884546661376953, "step": 74840 }, { "epoch": 0.6041991233664021, "grad_norm": 0.8365932106971741, "learning_rate": 7.921294314354257e-06, "loss": 2.734608268737793, "step": 74850 }, { "epoch": 0.6042798446921692, "grad_norm": 1.1772916316986084, "learning_rate": 7.919678844616045e-06, "loss": 2.6325990676879885, "step": 74860 }, { "epoch": 0.6043605660179363, "grad_norm": 0.6500372290611267, "learning_rate": 7.918063374877831e-06, "loss": 2.7582693099975586, "step": 74870 }, { "epoch": 0.6044412873437033, "grad_norm": 1.3946564197540283, "learning_rate": 7.916447905139619e-06, "loss": 2.604603958129883, "step": 74880 }, { "epoch": 0.6045220086694704, "grad_norm": 0.7783601880073547, "learning_rate": 7.914832435401405e-06, "loss": 3.1356718063354494, "step": 74890 }, { "epoch": 0.6046027299952375, "grad_norm": 0.8946294188499451, "learning_rate": 7.913216965663192e-06, "loss": 2.7712512969970704, "step": 74900 }, { "epoch": 0.6046834513210045, "grad_norm": 1.5325818061828613, "learning_rate": 7.911601495924978e-06, "loss": 3.333892822265625, "step": 74910 }, { "epoch": 0.6047641726467715, "grad_norm": 0.8849121928215027, "learning_rate": 7.909986026186766e-06, "loss": 2.377362823486328, "step": 74920 }, { "epoch": 0.6048448939725386, "grad_norm": 1.011807918548584, "learning_rate": 7.908370556448552e-06, "loss": 2.98028507232666, "step": 74930 }, { "epoch": 0.6049256152983057, "grad_norm": 0.8050228357315063, "learning_rate": 7.90675508671034e-06, "loss": 2.3699426651000977, "step": 74940 }, { "epoch": 0.6050063366240727, "grad_norm": 1.0522403717041016, "learning_rate": 7.905139616972126e-06, "loss": 2.6467464447021483, "step": 74950 }, { "epoch": 0.6050870579498397, "grad_norm": 0.8775122761726379, "learning_rate": 7.903524147233913e-06, "loss": 2.7273731231689453, "step": 74960 }, { "epoch": 0.6051677792756068, "grad_norm": 0.7649039626121521, "learning_rate": 7.9019086774957e-06, "loss": 2.4178625106811524, "step": 74970 }, { "epoch": 0.6052485006013739, "grad_norm": 0.8296538591384888, "learning_rate": 7.900293207757487e-06, "loss": 2.72711124420166, "step": 74980 }, { "epoch": 0.6053292219271409, "grad_norm": 1.0744684934616089, "learning_rate": 7.898677738019273e-06, "loss": 2.931590461730957, "step": 74990 }, { "epoch": 0.605409943252908, "grad_norm": 0.6373677849769592, "learning_rate": 7.897062268281061e-06, "loss": 2.5113964080810547, "step": 75000 }, { "epoch": 0.6054906645786751, "grad_norm": 0.8681395053863525, "learning_rate": 7.895446798542847e-06, "loss": 2.9710577011108397, "step": 75010 }, { "epoch": 0.6055713859044421, "grad_norm": 1.362949013710022, "learning_rate": 7.893831328804635e-06, "loss": 2.883464241027832, "step": 75020 }, { "epoch": 0.6056521072302091, "grad_norm": 0.7780911326408386, "learning_rate": 7.89221585906642e-06, "loss": 2.790719413757324, "step": 75030 }, { "epoch": 0.6057328285559762, "grad_norm": 1.618487000465393, "learning_rate": 7.890600389328208e-06, "loss": 2.710047149658203, "step": 75040 }, { "epoch": 0.6058135498817433, "grad_norm": 0.9427019953727722, "learning_rate": 7.888984919589994e-06, "loss": 2.840949058532715, "step": 75050 }, { "epoch": 0.6058942712075103, "grad_norm": 1.098590612411499, "learning_rate": 7.887369449851782e-06, "loss": 2.9320388793945313, "step": 75060 }, { "epoch": 0.6059749925332774, "grad_norm": 1.3216098546981812, "learning_rate": 7.885753980113568e-06, "loss": 2.5984386444091796, "step": 75070 }, { "epoch": 0.6060557138590444, "grad_norm": 0.6071099042892456, "learning_rate": 7.884138510375356e-06, "loss": 2.4989250183105467, "step": 75080 }, { "epoch": 0.6061364351848115, "grad_norm": 0.8472864627838135, "learning_rate": 7.882523040637142e-06, "loss": 2.7858211517333986, "step": 75090 }, { "epoch": 0.6062171565105785, "grad_norm": 0.9020171165466309, "learning_rate": 7.88090757089893e-06, "loss": 3.1758102416992187, "step": 75100 }, { "epoch": 0.6062978778363456, "grad_norm": 1.3772996664047241, "learning_rate": 7.879292101160715e-06, "loss": 2.742374801635742, "step": 75110 }, { "epoch": 0.6063785991621127, "grad_norm": 1.0797967910766602, "learning_rate": 7.877676631422503e-06, "loss": 2.729941177368164, "step": 75120 }, { "epoch": 0.6064593204878796, "grad_norm": 1.1363080739974976, "learning_rate": 7.876061161684289e-06, "loss": 2.681692886352539, "step": 75130 }, { "epoch": 0.6065400418136467, "grad_norm": 0.9602593779563904, "learning_rate": 7.874445691946077e-06, "loss": 2.485464096069336, "step": 75140 }, { "epoch": 0.6066207631394138, "grad_norm": 0.8240200281143188, "learning_rate": 7.872830222207863e-06, "loss": 2.939091110229492, "step": 75150 }, { "epoch": 0.6067014844651809, "grad_norm": 0.7849960327148438, "learning_rate": 7.87121475246965e-06, "loss": 2.798272895812988, "step": 75160 }, { "epoch": 0.6067822057909479, "grad_norm": 1.027831792831421, "learning_rate": 7.869599282731436e-06, "loss": 2.6124696731567383, "step": 75170 }, { "epoch": 0.606862927116715, "grad_norm": 2.444958209991455, "learning_rate": 7.867983812993224e-06, "loss": 2.7035783767700194, "step": 75180 }, { "epoch": 0.606943648442482, "grad_norm": 0.960895299911499, "learning_rate": 7.86636834325501e-06, "loss": 3.0520917892456056, "step": 75190 }, { "epoch": 0.607024369768249, "grad_norm": 1.2302559614181519, "learning_rate": 7.864752873516798e-06, "loss": 2.7291141510009767, "step": 75200 }, { "epoch": 0.6071050910940161, "grad_norm": 0.7931656837463379, "learning_rate": 7.863137403778584e-06, "loss": 2.7878231048583983, "step": 75210 }, { "epoch": 0.6071858124197832, "grad_norm": 1.4928256273269653, "learning_rate": 7.861521934040371e-06, "loss": 2.7931446075439452, "step": 75220 }, { "epoch": 0.6072665337455503, "grad_norm": 0.9900245666503906, "learning_rate": 7.859906464302157e-06, "loss": 3.1234277725219726, "step": 75230 }, { "epoch": 0.6073472550713173, "grad_norm": 0.9504873156547546, "learning_rate": 7.858290994563945e-06, "loss": 2.325104331970215, "step": 75240 }, { "epoch": 0.6074279763970843, "grad_norm": 1.1052387952804565, "learning_rate": 7.856675524825733e-06, "loss": 2.5379085540771484, "step": 75250 }, { "epoch": 0.6075086977228514, "grad_norm": 0.8489121794700623, "learning_rate": 7.855060055087519e-06, "loss": 2.6834381103515623, "step": 75260 }, { "epoch": 0.6075894190486184, "grad_norm": 0.6742318868637085, "learning_rate": 7.853444585349307e-06, "loss": 2.906637191772461, "step": 75270 }, { "epoch": 0.6076701403743855, "grad_norm": 1.0966931581497192, "learning_rate": 7.851829115611093e-06, "loss": 2.6399053573608398, "step": 75280 }, { "epoch": 0.6077508617001526, "grad_norm": 1.0361474752426147, "learning_rate": 7.85021364587288e-06, "loss": 2.5294378280639647, "step": 75290 }, { "epoch": 0.6078315830259197, "grad_norm": 0.902621328830719, "learning_rate": 7.848598176134666e-06, "loss": 2.197227668762207, "step": 75300 }, { "epoch": 0.6079123043516866, "grad_norm": 0.7933154702186584, "learning_rate": 7.846982706396454e-06, "loss": 2.7603044509887695, "step": 75310 }, { "epoch": 0.6079930256774537, "grad_norm": 0.5604228973388672, "learning_rate": 7.84536723665824e-06, "loss": 2.907611846923828, "step": 75320 }, { "epoch": 0.6080737470032208, "grad_norm": 1.2434055805206299, "learning_rate": 7.843751766920028e-06, "loss": 3.0005664825439453, "step": 75330 }, { "epoch": 0.6081544683289878, "grad_norm": 1.3828281164169312, "learning_rate": 7.842136297181814e-06, "loss": 2.69979248046875, "step": 75340 }, { "epoch": 0.6082351896547549, "grad_norm": 1.2326123714447021, "learning_rate": 7.840520827443601e-06, "loss": 3.0325691223144533, "step": 75350 }, { "epoch": 0.608315910980522, "grad_norm": 1.700165033340454, "learning_rate": 7.838905357705387e-06, "loss": 3.132184600830078, "step": 75360 }, { "epoch": 0.608396632306289, "grad_norm": 1.0858460664749146, "learning_rate": 7.837289887967175e-06, "loss": 2.596846008300781, "step": 75370 }, { "epoch": 0.608477353632056, "grad_norm": 1.3673683404922485, "learning_rate": 7.835674418228961e-06, "loss": 2.4510093688964845, "step": 75380 }, { "epoch": 0.6085580749578231, "grad_norm": 0.8011978268623352, "learning_rate": 7.834058948490749e-06, "loss": 2.720328521728516, "step": 75390 }, { "epoch": 0.6086387962835902, "grad_norm": 0.8597183227539062, "learning_rate": 7.832443478752535e-06, "loss": 2.848592185974121, "step": 75400 }, { "epoch": 0.6087195176093572, "grad_norm": 0.5923545360565186, "learning_rate": 7.830828009014322e-06, "loss": 2.442521858215332, "step": 75410 }, { "epoch": 0.6088002389351242, "grad_norm": 1.0385335683822632, "learning_rate": 7.829212539276108e-06, "loss": 3.0972951889038085, "step": 75420 }, { "epoch": 0.6088809602608913, "grad_norm": 0.9528780579566956, "learning_rate": 7.827597069537896e-06, "loss": 2.9448734283447267, "step": 75430 }, { "epoch": 0.6089616815866584, "grad_norm": 0.647283136844635, "learning_rate": 7.825981599799682e-06, "loss": 2.5950618743896485, "step": 75440 }, { "epoch": 0.6090424029124254, "grad_norm": 0.7155973315238953, "learning_rate": 7.82436613006147e-06, "loss": 2.6695785522460938, "step": 75450 }, { "epoch": 0.6091231242381925, "grad_norm": 1.0397237539291382, "learning_rate": 7.822750660323256e-06, "loss": 2.6432085037231445, "step": 75460 }, { "epoch": 0.6092038455639596, "grad_norm": 0.746491551399231, "learning_rate": 7.821135190585043e-06, "loss": 2.8194648742675783, "step": 75470 }, { "epoch": 0.6092845668897265, "grad_norm": 1.37045156955719, "learning_rate": 7.81951972084683e-06, "loss": 2.9810327529907226, "step": 75480 }, { "epoch": 0.6093652882154936, "grad_norm": 0.8156663179397583, "learning_rate": 7.817904251108617e-06, "loss": 2.8529165267944334, "step": 75490 }, { "epoch": 0.6094460095412607, "grad_norm": 1.0681278705596924, "learning_rate": 7.816288781370403e-06, "loss": 2.6674501419067385, "step": 75500 }, { "epoch": 0.6095267308670278, "grad_norm": 0.8903663158416748, "learning_rate": 7.81467331163219e-06, "loss": 2.8322776794433593, "step": 75510 }, { "epoch": 0.6096074521927948, "grad_norm": 1.850056529045105, "learning_rate": 7.813057841893977e-06, "loss": 2.3577709197998047, "step": 75520 }, { "epoch": 0.6096881735185619, "grad_norm": 1.4731261730194092, "learning_rate": 7.811442372155765e-06, "loss": 2.8947433471679687, "step": 75530 }, { "epoch": 0.609768894844329, "grad_norm": 0.7385357022285461, "learning_rate": 7.80982690241755e-06, "loss": 2.565811538696289, "step": 75540 }, { "epoch": 0.609849616170096, "grad_norm": 0.7342758178710938, "learning_rate": 7.808211432679338e-06, "loss": 2.4614799499511717, "step": 75550 }, { "epoch": 0.609930337495863, "grad_norm": 0.962923526763916, "learning_rate": 7.806595962941124e-06, "loss": 2.8625240325927734, "step": 75560 }, { "epoch": 0.6100110588216301, "grad_norm": 0.9014638662338257, "learning_rate": 7.804980493202912e-06, "loss": 2.714552879333496, "step": 75570 }, { "epoch": 0.6100917801473972, "grad_norm": 0.6470034718513489, "learning_rate": 7.803365023464698e-06, "loss": 2.627531623840332, "step": 75580 }, { "epoch": 0.6101725014731642, "grad_norm": 1.7984555959701538, "learning_rate": 7.801749553726486e-06, "loss": 3.0241653442382814, "step": 75590 }, { "epoch": 0.6102532227989312, "grad_norm": 2.8404064178466797, "learning_rate": 7.800134083988272e-06, "loss": 3.5974178314208984, "step": 75600 }, { "epoch": 0.6103339441246983, "grad_norm": 0.6505984663963318, "learning_rate": 7.79851861425006e-06, "loss": 2.4209041595458984, "step": 75610 }, { "epoch": 0.6104146654504654, "grad_norm": 1.8963844776153564, "learning_rate": 7.796903144511845e-06, "loss": 3.0747989654541015, "step": 75620 }, { "epoch": 0.6104953867762324, "grad_norm": 0.8579928874969482, "learning_rate": 7.795287674773633e-06, "loss": 2.6389743804931642, "step": 75630 }, { "epoch": 0.6105761081019995, "grad_norm": 1.4804505109786987, "learning_rate": 7.793672205035419e-06, "loss": 3.0692327499389647, "step": 75640 }, { "epoch": 0.6106568294277666, "grad_norm": 1.3047676086425781, "learning_rate": 7.792056735297207e-06, "loss": 3.224048614501953, "step": 75650 }, { "epoch": 0.6107375507535335, "grad_norm": 1.412115216255188, "learning_rate": 7.790441265558993e-06, "loss": 2.903236198425293, "step": 75660 }, { "epoch": 0.6108182720793006, "grad_norm": 1.0767836570739746, "learning_rate": 7.78882579582078e-06, "loss": 3.114372444152832, "step": 75670 }, { "epoch": 0.6108989934050677, "grad_norm": 0.7371973991394043, "learning_rate": 7.787210326082566e-06, "loss": 3.023340606689453, "step": 75680 }, { "epoch": 0.6109797147308348, "grad_norm": 1.369611144065857, "learning_rate": 7.785594856344354e-06, "loss": 3.036638641357422, "step": 75690 }, { "epoch": 0.6110604360566018, "grad_norm": 1.482298493385315, "learning_rate": 7.78397938660614e-06, "loss": 3.198257637023926, "step": 75700 }, { "epoch": 0.6111411573823688, "grad_norm": 0.7406080365180969, "learning_rate": 7.782363916867928e-06, "loss": 2.612666130065918, "step": 75710 }, { "epoch": 0.6112218787081359, "grad_norm": 0.9868334531784058, "learning_rate": 7.780748447129714e-06, "loss": 2.5275270462036135, "step": 75720 }, { "epoch": 0.6113026000339029, "grad_norm": 0.5682706236839294, "learning_rate": 7.779132977391501e-06, "loss": 2.404586410522461, "step": 75730 }, { "epoch": 0.61138332135967, "grad_norm": 1.0743423700332642, "learning_rate": 7.777517507653287e-06, "loss": 2.3889888763427733, "step": 75740 }, { "epoch": 0.6114640426854371, "grad_norm": 0.8714290261268616, "learning_rate": 7.775902037915075e-06, "loss": 2.602640724182129, "step": 75750 }, { "epoch": 0.6115447640112042, "grad_norm": 0.822614848613739, "learning_rate": 7.774286568176863e-06, "loss": 2.5101985931396484, "step": 75760 }, { "epoch": 0.6116254853369711, "grad_norm": 0.9219481945037842, "learning_rate": 7.772671098438649e-06, "loss": 2.895537567138672, "step": 75770 }, { "epoch": 0.6117062066627382, "grad_norm": 1.4895431995391846, "learning_rate": 7.771055628700437e-06, "loss": 2.813558578491211, "step": 75780 }, { "epoch": 0.6117869279885053, "grad_norm": 1.3660551309585571, "learning_rate": 7.769440158962223e-06, "loss": 3.115505599975586, "step": 75790 }, { "epoch": 0.6118676493142723, "grad_norm": 0.6192706823348999, "learning_rate": 7.76782468922401e-06, "loss": 2.5216386795043944, "step": 75800 }, { "epoch": 0.6119483706400394, "grad_norm": 0.8446713089942932, "learning_rate": 7.766209219485796e-06, "loss": 3.077385139465332, "step": 75810 }, { "epoch": 0.6120290919658065, "grad_norm": 0.9819811582565308, "learning_rate": 7.764593749747584e-06, "loss": 2.5173730850219727, "step": 75820 }, { "epoch": 0.6121098132915735, "grad_norm": 0.7850989103317261, "learning_rate": 7.76297828000937e-06, "loss": 2.6462371826171873, "step": 75830 }, { "epoch": 0.6121905346173405, "grad_norm": 0.7666721343994141, "learning_rate": 7.761362810271158e-06, "loss": 2.800586700439453, "step": 75840 }, { "epoch": 0.6122712559431076, "grad_norm": 0.9982514977455139, "learning_rate": 7.759747340532944e-06, "loss": 2.9565792083740234, "step": 75850 }, { "epoch": 0.6123519772688747, "grad_norm": 1.150025725364685, "learning_rate": 7.758131870794731e-06, "loss": 2.5399818420410156, "step": 75860 }, { "epoch": 0.6124326985946417, "grad_norm": 0.7336980700492859, "learning_rate": 7.756516401056517e-06, "loss": 2.6688421249389647, "step": 75870 }, { "epoch": 0.6125134199204088, "grad_norm": 1.7128723859786987, "learning_rate": 7.754900931318305e-06, "loss": 2.4874526977539064, "step": 75880 }, { "epoch": 0.6125941412461758, "grad_norm": 0.75752192735672, "learning_rate": 7.753285461580091e-06, "loss": 2.9889163970947266, "step": 75890 }, { "epoch": 0.6126748625719429, "grad_norm": 0.8910160064697266, "learning_rate": 7.751669991841879e-06, "loss": 2.7486753463745117, "step": 75900 }, { "epoch": 0.6127555838977099, "grad_norm": 0.8799201846122742, "learning_rate": 7.750054522103665e-06, "loss": 2.5386653900146485, "step": 75910 }, { "epoch": 0.612836305223477, "grad_norm": 1.0369380712509155, "learning_rate": 7.748439052365452e-06, "loss": 2.5784891128540037, "step": 75920 }, { "epoch": 0.6129170265492441, "grad_norm": 0.6887730956077576, "learning_rate": 7.746823582627238e-06, "loss": 2.3386043548583983, "step": 75930 }, { "epoch": 0.612997747875011, "grad_norm": 0.853111207485199, "learning_rate": 7.745208112889026e-06, "loss": 2.5197822570800783, "step": 75940 }, { "epoch": 0.6130784692007781, "grad_norm": 0.8607232570648193, "learning_rate": 7.743592643150812e-06, "loss": 3.190015983581543, "step": 75950 }, { "epoch": 0.6131591905265452, "grad_norm": 1.2793958187103271, "learning_rate": 7.7419771734126e-06, "loss": 2.2182817459106445, "step": 75960 }, { "epoch": 0.6132399118523123, "grad_norm": 0.7522256970405579, "learning_rate": 7.740361703674386e-06, "loss": 2.7626626968383787, "step": 75970 }, { "epoch": 0.6133206331780793, "grad_norm": 1.0093505382537842, "learning_rate": 7.738746233936173e-06, "loss": 2.62529296875, "step": 75980 }, { "epoch": 0.6134013545038464, "grad_norm": 0.9085893630981445, "learning_rate": 7.73713076419796e-06, "loss": 3.038936996459961, "step": 75990 }, { "epoch": 0.6134820758296134, "grad_norm": 1.1808472871780396, "learning_rate": 7.735515294459747e-06, "loss": 2.7220481872558593, "step": 76000 }, { "epoch": 0.6135627971553805, "grad_norm": 1.1474831104278564, "learning_rate": 7.733899824721533e-06, "loss": 2.8551116943359376, "step": 76010 }, { "epoch": 0.6136435184811475, "grad_norm": 0.5808185338973999, "learning_rate": 7.73228435498332e-06, "loss": 3.2891376495361326, "step": 76020 }, { "epoch": 0.6137242398069146, "grad_norm": 0.71409010887146, "learning_rate": 7.730668885245107e-06, "loss": 3.2541900634765626, "step": 76030 }, { "epoch": 0.6138049611326817, "grad_norm": 0.9260393977165222, "learning_rate": 7.729053415506895e-06, "loss": 2.616163635253906, "step": 76040 }, { "epoch": 0.6138856824584487, "grad_norm": 1.338466763496399, "learning_rate": 7.72743794576868e-06, "loss": 2.55593318939209, "step": 76050 }, { "epoch": 0.6139664037842157, "grad_norm": 0.7484163641929626, "learning_rate": 7.725822476030468e-06, "loss": 3.5359466552734373, "step": 76060 }, { "epoch": 0.6140471251099828, "grad_norm": 0.8606309294700623, "learning_rate": 7.724207006292254e-06, "loss": 2.6878780364990233, "step": 76070 }, { "epoch": 0.6141278464357499, "grad_norm": 0.8848318457603455, "learning_rate": 7.722591536554042e-06, "loss": 2.597676467895508, "step": 76080 }, { "epoch": 0.6142085677615169, "grad_norm": 0.826807975769043, "learning_rate": 7.720976066815828e-06, "loss": 2.6962480545043945, "step": 76090 }, { "epoch": 0.614289289087284, "grad_norm": 1.389387607574463, "learning_rate": 7.719360597077616e-06, "loss": 2.770038032531738, "step": 76100 }, { "epoch": 0.6143700104130511, "grad_norm": 1.1931908130645752, "learning_rate": 7.717745127339403e-06, "loss": 2.4398193359375, "step": 76110 }, { "epoch": 0.614450731738818, "grad_norm": 0.996279239654541, "learning_rate": 7.716129657601191e-06, "loss": 2.580829620361328, "step": 76120 }, { "epoch": 0.6145314530645851, "grad_norm": 0.7783726453781128, "learning_rate": 7.714514187862977e-06, "loss": 2.561431121826172, "step": 76130 }, { "epoch": 0.6146121743903522, "grad_norm": 0.7956910133361816, "learning_rate": 7.712898718124765e-06, "loss": 2.9835058212280274, "step": 76140 }, { "epoch": 0.6146928957161193, "grad_norm": 0.7712082862854004, "learning_rate": 7.71128324838655e-06, "loss": 2.7065359115600587, "step": 76150 }, { "epoch": 0.6147736170418863, "grad_norm": 1.155006766319275, "learning_rate": 7.709667778648338e-06, "loss": 2.4508628845214844, "step": 76160 }, { "epoch": 0.6148543383676534, "grad_norm": 0.7105632424354553, "learning_rate": 7.708052308910124e-06, "loss": 2.7768985748291017, "step": 76170 }, { "epoch": 0.6149350596934204, "grad_norm": 1.146873950958252, "learning_rate": 7.706436839171912e-06, "loss": 2.75264949798584, "step": 76180 }, { "epoch": 0.6150157810191874, "grad_norm": 0.9782143831253052, "learning_rate": 7.704821369433698e-06, "loss": 3.3670799255371096, "step": 76190 }, { "epoch": 0.6150965023449545, "grad_norm": 0.8445741534233093, "learning_rate": 7.703205899695486e-06, "loss": 3.1340963363647463, "step": 76200 }, { "epoch": 0.6151772236707216, "grad_norm": 0.8573333621025085, "learning_rate": 7.701590429957272e-06, "loss": 3.2670757293701174, "step": 76210 }, { "epoch": 0.6152579449964887, "grad_norm": 0.9758239984512329, "learning_rate": 7.69997496021906e-06, "loss": 2.2919942855834963, "step": 76220 }, { "epoch": 0.6153386663222556, "grad_norm": 0.8066362738609314, "learning_rate": 7.698359490480845e-06, "loss": 2.8549257278442384, "step": 76230 }, { "epoch": 0.6154193876480227, "grad_norm": 0.657774806022644, "learning_rate": 7.696744020742633e-06, "loss": 3.0027584075927733, "step": 76240 }, { "epoch": 0.6155001089737898, "grad_norm": 6.489556789398193, "learning_rate": 7.695128551004419e-06, "loss": 3.470226287841797, "step": 76250 }, { "epoch": 0.6155808302995568, "grad_norm": 0.9944958090782166, "learning_rate": 7.693513081266207e-06, "loss": 2.4417518615722655, "step": 76260 }, { "epoch": 0.6156615516253239, "grad_norm": 1.0295448303222656, "learning_rate": 7.691897611527993e-06, "loss": 3.1348445892333983, "step": 76270 }, { "epoch": 0.615742272951091, "grad_norm": 1.2143418788909912, "learning_rate": 7.69028214178978e-06, "loss": 3.495524215698242, "step": 76280 }, { "epoch": 0.615822994276858, "grad_norm": 0.7603035569190979, "learning_rate": 7.688666672051567e-06, "loss": 2.706239128112793, "step": 76290 }, { "epoch": 0.615903715602625, "grad_norm": 0.9935427904129028, "learning_rate": 7.687051202313354e-06, "loss": 2.8291927337646485, "step": 76300 }, { "epoch": 0.6159844369283921, "grad_norm": 0.8951336741447449, "learning_rate": 7.68543573257514e-06, "loss": 2.482040023803711, "step": 76310 }, { "epoch": 0.6160651582541592, "grad_norm": 0.7628271579742432, "learning_rate": 7.683820262836928e-06, "loss": 3.1144136428833007, "step": 76320 }, { "epoch": 0.6161458795799262, "grad_norm": 1.0209410190582275, "learning_rate": 7.682204793098714e-06, "loss": 2.7477439880371093, "step": 76330 }, { "epoch": 0.6162266009056933, "grad_norm": 0.8706042170524597, "learning_rate": 7.680589323360502e-06, "loss": 2.787929916381836, "step": 76340 }, { "epoch": 0.6163073222314603, "grad_norm": 1.0918887853622437, "learning_rate": 7.678973853622288e-06, "loss": 2.6206523895263674, "step": 76350 }, { "epoch": 0.6163880435572274, "grad_norm": 0.9761660099029541, "learning_rate": 7.677358383884075e-06, "loss": 2.812534713745117, "step": 76360 }, { "epoch": 0.6164687648829944, "grad_norm": 0.50898677110672, "learning_rate": 7.675742914145861e-06, "loss": 2.596450996398926, "step": 76370 }, { "epoch": 0.6165494862087615, "grad_norm": 0.8184719085693359, "learning_rate": 7.674127444407649e-06, "loss": 2.7927524566650392, "step": 76380 }, { "epoch": 0.6166302075345286, "grad_norm": 0.6032389402389526, "learning_rate": 7.672511974669435e-06, "loss": 2.624776268005371, "step": 76390 }, { "epoch": 0.6167109288602955, "grad_norm": 1.886531114578247, "learning_rate": 7.670896504931223e-06, "loss": 2.800292205810547, "step": 76400 }, { "epoch": 0.6167916501860626, "grad_norm": 0.6456955075263977, "learning_rate": 7.669281035193009e-06, "loss": 2.634215545654297, "step": 76410 }, { "epoch": 0.6168723715118297, "grad_norm": 0.7380483150482178, "learning_rate": 7.667665565454796e-06, "loss": 3.0293956756591798, "step": 76420 }, { "epoch": 0.6169530928375968, "grad_norm": 2.0070881843566895, "learning_rate": 7.666050095716582e-06, "loss": 3.3105438232421873, "step": 76430 }, { "epoch": 0.6170338141633638, "grad_norm": 2.242691993713379, "learning_rate": 7.66443462597837e-06, "loss": 3.238753890991211, "step": 76440 }, { "epoch": 0.6171145354891309, "grad_norm": 1.0939698219299316, "learning_rate": 7.662819156240156e-06, "loss": 2.848690223693848, "step": 76450 }, { "epoch": 0.617195256814898, "grad_norm": 0.8337110280990601, "learning_rate": 7.661203686501944e-06, "loss": 2.547013282775879, "step": 76460 }, { "epoch": 0.6172759781406649, "grad_norm": 1.1512935161590576, "learning_rate": 7.65958821676373e-06, "loss": 2.4589111328125, "step": 76470 }, { "epoch": 0.617356699466432, "grad_norm": 0.8108060359954834, "learning_rate": 7.657972747025517e-06, "loss": 2.4801511764526367, "step": 76480 }, { "epoch": 0.6174374207921991, "grad_norm": 0.8087893128395081, "learning_rate": 7.656357277287303e-06, "loss": 2.803700065612793, "step": 76490 }, { "epoch": 0.6175181421179662, "grad_norm": 1.7129985094070435, "learning_rate": 7.654741807549091e-06, "loss": 2.4770631790161133, "step": 76500 }, { "epoch": 0.6175988634437332, "grad_norm": 0.697281539440155, "learning_rate": 7.653126337810877e-06, "loss": 2.786591720581055, "step": 76510 }, { "epoch": 0.6176795847695002, "grad_norm": 0.7372569441795349, "learning_rate": 7.651510868072665e-06, "loss": 2.6896316528320314, "step": 76520 }, { "epoch": 0.6177603060952673, "grad_norm": 1.1142200231552124, "learning_rate": 7.64989539833445e-06, "loss": 3.2683658599853516, "step": 76530 }, { "epoch": 0.6178410274210344, "grad_norm": 2.3162596225738525, "learning_rate": 7.648279928596238e-06, "loss": 3.5363994598388673, "step": 76540 }, { "epoch": 0.6179217487468014, "grad_norm": 0.6983819603919983, "learning_rate": 7.646664458858024e-06, "loss": 3.4732986450195313, "step": 76550 }, { "epoch": 0.6180024700725685, "grad_norm": 0.667359471321106, "learning_rate": 7.645048989119812e-06, "loss": 3.283025360107422, "step": 76560 }, { "epoch": 0.6180831913983356, "grad_norm": 0.9104306697845459, "learning_rate": 7.643433519381598e-06, "loss": 2.7081676483154298, "step": 76570 }, { "epoch": 0.6181639127241025, "grad_norm": 1.0149226188659668, "learning_rate": 7.641818049643386e-06, "loss": 2.6814781188964845, "step": 76580 }, { "epoch": 0.6182446340498696, "grad_norm": 1.183409333229065, "learning_rate": 7.640202579905172e-06, "loss": 2.6088325500488283, "step": 76590 }, { "epoch": 0.6183253553756367, "grad_norm": 0.8575390577316284, "learning_rate": 7.63858711016696e-06, "loss": 2.616432952880859, "step": 76600 }, { "epoch": 0.6184060767014038, "grad_norm": 1.1819485425949097, "learning_rate": 7.636971640428746e-06, "loss": 2.2571592330932617, "step": 76610 }, { "epoch": 0.6184867980271708, "grad_norm": 0.6932855844497681, "learning_rate": 7.635356170690533e-06, "loss": 2.537070083618164, "step": 76620 }, { "epoch": 0.6185675193529379, "grad_norm": 0.8472782969474792, "learning_rate": 7.633740700952321e-06, "loss": 2.95068302154541, "step": 76630 }, { "epoch": 0.6186482406787049, "grad_norm": 0.9745725989341736, "learning_rate": 7.632125231214107e-06, "loss": 2.7821374893188477, "step": 76640 }, { "epoch": 0.6187289620044719, "grad_norm": 0.720210611820221, "learning_rate": 7.630509761475895e-06, "loss": 2.5293609619140627, "step": 76650 }, { "epoch": 0.618809683330239, "grad_norm": 0.7014342546463013, "learning_rate": 7.628894291737681e-06, "loss": 2.645980644226074, "step": 76660 }, { "epoch": 0.6188904046560061, "grad_norm": 0.9526158571243286, "learning_rate": 7.6272788219994675e-06, "loss": 2.6956539154052734, "step": 76670 }, { "epoch": 0.6189711259817732, "grad_norm": 0.8705059885978699, "learning_rate": 7.625663352261254e-06, "loss": 2.933134651184082, "step": 76680 }, { "epoch": 0.6190518473075401, "grad_norm": 0.9199719429016113, "learning_rate": 7.624047882523041e-06, "loss": 2.416523742675781, "step": 76690 }, { "epoch": 0.6191325686333072, "grad_norm": 0.9399259686470032, "learning_rate": 7.622432412784828e-06, "loss": 2.591412162780762, "step": 76700 }, { "epoch": 0.6192132899590743, "grad_norm": 0.92197185754776, "learning_rate": 7.620816943046615e-06, "loss": 2.757032012939453, "step": 76710 }, { "epoch": 0.6192940112848413, "grad_norm": 1.064924716949463, "learning_rate": 7.619201473308402e-06, "loss": 2.443999099731445, "step": 76720 }, { "epoch": 0.6193747326106084, "grad_norm": 0.6698389053344727, "learning_rate": 7.6175860035701886e-06, "loss": 2.742059326171875, "step": 76730 }, { "epoch": 0.6194554539363755, "grad_norm": 0.9840332865715027, "learning_rate": 7.615970533831975e-06, "loss": 3.201625442504883, "step": 76740 }, { "epoch": 0.6195361752621426, "grad_norm": 1.150741457939148, "learning_rate": 7.614355064093762e-06, "loss": 2.807843780517578, "step": 76750 }, { "epoch": 0.6196168965879095, "grad_norm": 0.9036865234375, "learning_rate": 7.612739594355549e-06, "loss": 2.628129005432129, "step": 76760 }, { "epoch": 0.6196976179136766, "grad_norm": 1.010705828666687, "learning_rate": 7.611124124617336e-06, "loss": 3.531620407104492, "step": 76770 }, { "epoch": 0.6197783392394437, "grad_norm": 0.8655620217323303, "learning_rate": 7.609508654879123e-06, "loss": 2.5635940551757814, "step": 76780 }, { "epoch": 0.6198590605652107, "grad_norm": 1.0441784858703613, "learning_rate": 7.60789318514091e-06, "loss": 2.6852474212646484, "step": 76790 }, { "epoch": 0.6199397818909778, "grad_norm": 0.6329461336135864, "learning_rate": 7.6062777154026965e-06, "loss": 3.3640811920166014, "step": 76800 }, { "epoch": 0.6200205032167448, "grad_norm": 1.2395132780075073, "learning_rate": 7.604662245664483e-06, "loss": 2.8596677780151367, "step": 76810 }, { "epoch": 0.6201012245425119, "grad_norm": 1.0953065156936646, "learning_rate": 7.60304677592627e-06, "loss": 2.6885236740112304, "step": 76820 }, { "epoch": 0.6201819458682789, "grad_norm": 0.8111222386360168, "learning_rate": 7.601431306188057e-06, "loss": 2.429987907409668, "step": 76830 }, { "epoch": 0.620262667194046, "grad_norm": 1.1381146907806396, "learning_rate": 7.599815836449844e-06, "loss": 2.946498489379883, "step": 76840 }, { "epoch": 0.6203433885198131, "grad_norm": 0.9579862356185913, "learning_rate": 7.598200366711631e-06, "loss": 3.0713274002075197, "step": 76850 }, { "epoch": 0.62042410984558, "grad_norm": 1.1622087955474854, "learning_rate": 7.5965848969734176e-06, "loss": 3.1653553009033204, "step": 76860 }, { "epoch": 0.6205048311713471, "grad_norm": 0.8700573444366455, "learning_rate": 7.594969427235204e-06, "loss": 2.928049659729004, "step": 76870 }, { "epoch": 0.6205855524971142, "grad_norm": 0.7031692862510681, "learning_rate": 7.593353957496991e-06, "loss": 3.1323190689086915, "step": 76880 }, { "epoch": 0.6206662738228813, "grad_norm": 0.8124890923500061, "learning_rate": 7.591738487758779e-06, "loss": 2.854241943359375, "step": 76890 }, { "epoch": 0.6207469951486483, "grad_norm": 0.8176396489143372, "learning_rate": 7.590123018020566e-06, "loss": 2.4309078216552735, "step": 76900 }, { "epoch": 0.6208277164744154, "grad_norm": 1.510936975479126, "learning_rate": 7.588507548282353e-06, "loss": 2.5707067489624023, "step": 76910 }, { "epoch": 0.6209084378001825, "grad_norm": 1.0193133354187012, "learning_rate": 7.5868920785441395e-06, "loss": 2.6823051452636717, "step": 76920 }, { "epoch": 0.6209891591259494, "grad_norm": 1.4105932712554932, "learning_rate": 7.585276608805926e-06, "loss": 2.4644113540649415, "step": 76930 }, { "epoch": 0.6210698804517165, "grad_norm": 0.8700125813484192, "learning_rate": 7.583661139067713e-06, "loss": 2.3276838302612304, "step": 76940 }, { "epoch": 0.6211506017774836, "grad_norm": 0.7661682367324829, "learning_rate": 7.5820456693295e-06, "loss": 3.309719467163086, "step": 76950 }, { "epoch": 0.6212313231032507, "grad_norm": 1.1497223377227783, "learning_rate": 7.580430199591287e-06, "loss": 2.662112808227539, "step": 76960 }, { "epoch": 0.6213120444290177, "grad_norm": 0.8989685773849487, "learning_rate": 7.578814729853074e-06, "loss": 2.2575950622558594, "step": 76970 }, { "epoch": 0.6213927657547847, "grad_norm": 1.3572005033493042, "learning_rate": 7.5771992601148605e-06, "loss": 2.7622026443481444, "step": 76980 }, { "epoch": 0.6214734870805518, "grad_norm": 0.6893504858016968, "learning_rate": 7.575583790376647e-06, "loss": 2.957347869873047, "step": 76990 }, { "epoch": 0.6215542084063189, "grad_norm": 0.7081708908081055, "learning_rate": 7.573968320638434e-06, "loss": 2.5424173355102537, "step": 77000 }, { "epoch": 0.6216349297320859, "grad_norm": 0.8744514584541321, "learning_rate": 7.572352850900221e-06, "loss": 2.6406021118164062, "step": 77010 }, { "epoch": 0.621715651057853, "grad_norm": 0.6461005806922913, "learning_rate": 7.570737381162008e-06, "loss": 2.4609491348266603, "step": 77020 }, { "epoch": 0.6217963723836201, "grad_norm": 1.0042470693588257, "learning_rate": 7.569121911423795e-06, "loss": 2.9366947174072267, "step": 77030 }, { "epoch": 0.621877093709387, "grad_norm": 0.7904133796691895, "learning_rate": 7.567506441685582e-06, "loss": 2.911727714538574, "step": 77040 }, { "epoch": 0.6219578150351541, "grad_norm": 1.1342496871948242, "learning_rate": 7.5658909719473685e-06, "loss": 2.6143503189086914, "step": 77050 }, { "epoch": 0.6220385363609212, "grad_norm": 0.6497586369514465, "learning_rate": 7.564275502209155e-06, "loss": 2.7060077667236326, "step": 77060 }, { "epoch": 0.6221192576866883, "grad_norm": 0.9549485445022583, "learning_rate": 7.562660032470942e-06, "loss": 2.843013381958008, "step": 77070 }, { "epoch": 0.6221999790124553, "grad_norm": 0.9982573390007019, "learning_rate": 7.561044562732729e-06, "loss": 2.457752799987793, "step": 77080 }, { "epoch": 0.6222807003382224, "grad_norm": 0.8660799860954285, "learning_rate": 7.559429092994516e-06, "loss": 2.68859920501709, "step": 77090 }, { "epoch": 0.6223614216639894, "grad_norm": 2.215813636779785, "learning_rate": 7.557813623256303e-06, "loss": 3.2093467712402344, "step": 77100 }, { "epoch": 0.6224421429897564, "grad_norm": 1.147997260093689, "learning_rate": 7.5561981535180895e-06, "loss": 2.3461862564086915, "step": 77110 }, { "epoch": 0.6225228643155235, "grad_norm": 1.0813713073730469, "learning_rate": 7.554582683779876e-06, "loss": 2.395160102844238, "step": 77120 }, { "epoch": 0.6226035856412906, "grad_norm": 1.3345891237258911, "learning_rate": 7.552967214041663e-06, "loss": 2.668960762023926, "step": 77130 }, { "epoch": 0.6226843069670577, "grad_norm": 1.307891607284546, "learning_rate": 7.55135174430345e-06, "loss": 3.1984832763671873, "step": 77140 }, { "epoch": 0.6227650282928247, "grad_norm": 0.5732817053794861, "learning_rate": 7.549736274565237e-06, "loss": 2.7888940811157226, "step": 77150 }, { "epoch": 0.6228457496185917, "grad_norm": 0.7036728262901306, "learning_rate": 7.548120804827024e-06, "loss": 2.7828174591064454, "step": 77160 }, { "epoch": 0.6229264709443588, "grad_norm": 1.7833986282348633, "learning_rate": 7.546505335088811e-06, "loss": 2.3434667587280273, "step": 77170 }, { "epoch": 0.6230071922701258, "grad_norm": 0.8488791584968567, "learning_rate": 7.5448898653505975e-06, "loss": 2.7855504989624023, "step": 77180 }, { "epoch": 0.6230879135958929, "grad_norm": 0.6293070316314697, "learning_rate": 7.543274395612384e-06, "loss": 2.8228004455566404, "step": 77190 }, { "epoch": 0.62316863492166, "grad_norm": 0.7961729168891907, "learning_rate": 7.541658925874171e-06, "loss": 2.744223213195801, "step": 77200 }, { "epoch": 0.623249356247427, "grad_norm": 1.4166533946990967, "learning_rate": 7.540043456135958e-06, "loss": 2.3755519866943358, "step": 77210 }, { "epoch": 0.623330077573194, "grad_norm": 1.135618805885315, "learning_rate": 7.538427986397745e-06, "loss": 2.775339889526367, "step": 77220 }, { "epoch": 0.6234107988989611, "grad_norm": 0.6433016657829285, "learning_rate": 7.536812516659532e-06, "loss": 2.9430210113525392, "step": 77230 }, { "epoch": 0.6234915202247282, "grad_norm": 1.0039852857589722, "learning_rate": 7.5351970469213185e-06, "loss": 2.44891357421875, "step": 77240 }, { "epoch": 0.6235722415504952, "grad_norm": 0.703654408454895, "learning_rate": 7.533581577183105e-06, "loss": 2.5607778549194338, "step": 77250 }, { "epoch": 0.6236529628762623, "grad_norm": 0.6961173415184021, "learning_rate": 7.531966107444892e-06, "loss": 2.7085439682006838, "step": 77260 }, { "epoch": 0.6237336842020293, "grad_norm": 1.296512484550476, "learning_rate": 7.530350637706679e-06, "loss": 2.67981071472168, "step": 77270 }, { "epoch": 0.6238144055277964, "grad_norm": 0.8641360998153687, "learning_rate": 7.528735167968466e-06, "loss": 2.680426597595215, "step": 77280 }, { "epoch": 0.6238951268535634, "grad_norm": 0.6170728802680969, "learning_rate": 7.527119698230253e-06, "loss": 2.726595878601074, "step": 77290 }, { "epoch": 0.6239758481793305, "grad_norm": 1.3175568580627441, "learning_rate": 7.52550422849204e-06, "loss": 2.760774612426758, "step": 77300 }, { "epoch": 0.6240565695050976, "grad_norm": 1.1825858354568481, "learning_rate": 7.5238887587538265e-06, "loss": 2.8684162139892577, "step": 77310 }, { "epoch": 0.6241372908308646, "grad_norm": 0.910521388053894, "learning_rate": 7.522273289015613e-06, "loss": 2.4567399978637696, "step": 77320 }, { "epoch": 0.6242180121566316, "grad_norm": 0.9300190210342407, "learning_rate": 7.5206578192774e-06, "loss": 2.6503921508789063, "step": 77330 }, { "epoch": 0.6242987334823987, "grad_norm": 0.658251166343689, "learning_rate": 7.519042349539187e-06, "loss": 2.9947832107543944, "step": 77340 }, { "epoch": 0.6243794548081658, "grad_norm": 1.2364460229873657, "learning_rate": 7.5174268798009755e-06, "loss": 2.731489372253418, "step": 77350 }, { "epoch": 0.6244601761339328, "grad_norm": 0.8141847252845764, "learning_rate": 7.515811410062762e-06, "loss": 2.728854942321777, "step": 77360 }, { "epoch": 0.6245408974596999, "grad_norm": 1.432700514793396, "learning_rate": 7.514195940324549e-06, "loss": 2.698252487182617, "step": 77370 }, { "epoch": 0.624621618785467, "grad_norm": 1.2412582635879517, "learning_rate": 7.512580470586336e-06, "loss": 2.4929292678833006, "step": 77380 }, { "epoch": 0.6247023401112339, "grad_norm": 0.6129826903343201, "learning_rate": 7.510965000848123e-06, "loss": 2.6989540100097655, "step": 77390 }, { "epoch": 0.624783061437001, "grad_norm": 0.8719902634620667, "learning_rate": 7.50934953110991e-06, "loss": 2.7082542419433593, "step": 77400 }, { "epoch": 0.6248637827627681, "grad_norm": 1.0956131219863892, "learning_rate": 7.507734061371697e-06, "loss": 2.293764114379883, "step": 77410 }, { "epoch": 0.6249445040885352, "grad_norm": 1.5409879684448242, "learning_rate": 7.5061185916334835e-06, "loss": 2.5739809036254884, "step": 77420 }, { "epoch": 0.6250252254143022, "grad_norm": 1.0276665687561035, "learning_rate": 7.50450312189527e-06, "loss": 2.669349479675293, "step": 77430 }, { "epoch": 0.6251059467400693, "grad_norm": 0.9879321455955505, "learning_rate": 7.502887652157057e-06, "loss": 3.0549266815185545, "step": 77440 }, { "epoch": 0.6251866680658363, "grad_norm": 1.1239471435546875, "learning_rate": 7.501272182418844e-06, "loss": 3.1745786666870117, "step": 77450 }, { "epoch": 0.6252673893916034, "grad_norm": 0.7452199459075928, "learning_rate": 7.499656712680631e-06, "loss": 2.937140464782715, "step": 77460 }, { "epoch": 0.6253481107173704, "grad_norm": 0.8414410948753357, "learning_rate": 7.498041242942418e-06, "loss": 2.8376550674438477, "step": 77470 }, { "epoch": 0.6254288320431375, "grad_norm": 0.9623201489448547, "learning_rate": 7.4964257732042045e-06, "loss": 2.783688545227051, "step": 77480 }, { "epoch": 0.6255095533689046, "grad_norm": 1.5649603605270386, "learning_rate": 7.494810303465991e-06, "loss": 3.443529510498047, "step": 77490 }, { "epoch": 0.6255902746946715, "grad_norm": 1.0198951959609985, "learning_rate": 7.493194833727778e-06, "loss": 2.6175868988037108, "step": 77500 }, { "epoch": 0.6256709960204386, "grad_norm": 0.6694701910018921, "learning_rate": 7.491579363989565e-06, "loss": 2.770223045349121, "step": 77510 }, { "epoch": 0.6257517173462057, "grad_norm": 0.7849498987197876, "learning_rate": 7.489963894251352e-06, "loss": 2.63122444152832, "step": 77520 }, { "epoch": 0.6258324386719728, "grad_norm": 0.8653777241706848, "learning_rate": 7.488348424513139e-06, "loss": 2.3576324462890623, "step": 77530 }, { "epoch": 0.6259131599977398, "grad_norm": 0.6553161144256592, "learning_rate": 7.486732954774926e-06, "loss": 2.498693084716797, "step": 77540 }, { "epoch": 0.6259938813235069, "grad_norm": 1.2562201023101807, "learning_rate": 7.4851174850367124e-06, "loss": 2.5646793365478517, "step": 77550 }, { "epoch": 0.626074602649274, "grad_norm": 1.1839971542358398, "learning_rate": 7.483502015298499e-06, "loss": 3.0425628662109374, "step": 77560 }, { "epoch": 0.6261553239750409, "grad_norm": 0.7849217057228088, "learning_rate": 7.481886545560286e-06, "loss": 3.1656778335571287, "step": 77570 }, { "epoch": 0.626236045300808, "grad_norm": 1.40972101688385, "learning_rate": 7.480271075822073e-06, "loss": 3.0834228515625, "step": 77580 }, { "epoch": 0.6263167666265751, "grad_norm": 1.331087589263916, "learning_rate": 7.47865560608386e-06, "loss": 2.5421260833740233, "step": 77590 }, { "epoch": 0.6263974879523422, "grad_norm": 1.2429709434509277, "learning_rate": 7.477040136345647e-06, "loss": 2.48177433013916, "step": 77600 }, { "epoch": 0.6264782092781092, "grad_norm": 0.9963348507881165, "learning_rate": 7.4754246666074335e-06, "loss": 2.423717498779297, "step": 77610 }, { "epoch": 0.6265589306038762, "grad_norm": 0.9641526937484741, "learning_rate": 7.47380919686922e-06, "loss": 2.8911699295043944, "step": 77620 }, { "epoch": 0.6266396519296433, "grad_norm": 0.7834506034851074, "learning_rate": 7.472193727131007e-06, "loss": 2.7891740798950195, "step": 77630 }, { "epoch": 0.6267203732554103, "grad_norm": 1.02788245677948, "learning_rate": 7.470578257392794e-06, "loss": 2.8346466064453124, "step": 77640 }, { "epoch": 0.6268010945811774, "grad_norm": 0.58420729637146, "learning_rate": 7.468962787654581e-06, "loss": 3.4245887756347657, "step": 77650 }, { "epoch": 0.6268818159069445, "grad_norm": 0.578020453453064, "learning_rate": 7.467347317916368e-06, "loss": 2.916088104248047, "step": 77660 }, { "epoch": 0.6269625372327116, "grad_norm": 0.8338156342506409, "learning_rate": 7.465731848178155e-06, "loss": 2.521200180053711, "step": 77670 }, { "epoch": 0.6270432585584785, "grad_norm": 0.651125431060791, "learning_rate": 7.4641163784399414e-06, "loss": 2.47872314453125, "step": 77680 }, { "epoch": 0.6271239798842456, "grad_norm": 0.6045582890510559, "learning_rate": 7.462500908701728e-06, "loss": 2.351384162902832, "step": 77690 }, { "epoch": 0.6272047012100127, "grad_norm": 0.8364422917366028, "learning_rate": 7.460885438963515e-06, "loss": 2.6751071929931642, "step": 77700 }, { "epoch": 0.6272854225357797, "grad_norm": 0.652485191822052, "learning_rate": 7.459269969225302e-06, "loss": 2.7030609130859373, "step": 77710 }, { "epoch": 0.6273661438615468, "grad_norm": 1.1398241519927979, "learning_rate": 7.457654499487089e-06, "loss": 2.918894958496094, "step": 77720 }, { "epoch": 0.6274468651873139, "grad_norm": 0.6194514036178589, "learning_rate": 7.456039029748876e-06, "loss": 2.5364145278930663, "step": 77730 }, { "epoch": 0.6275275865130809, "grad_norm": 0.7653778791427612, "learning_rate": 7.4544235600106625e-06, "loss": 2.4393280029296873, "step": 77740 }, { "epoch": 0.6276083078388479, "grad_norm": 0.9959791898727417, "learning_rate": 7.452808090272449e-06, "loss": 2.8846900939941404, "step": 77750 }, { "epoch": 0.627689029164615, "grad_norm": 0.9218968152999878, "learning_rate": 7.451192620534237e-06, "loss": 2.999647521972656, "step": 77760 }, { "epoch": 0.6277697504903821, "grad_norm": 0.7085057497024536, "learning_rate": 7.449577150796024e-06, "loss": 2.525369071960449, "step": 77770 }, { "epoch": 0.6278504718161491, "grad_norm": 0.9340075254440308, "learning_rate": 7.447961681057811e-06, "loss": 3.1095531463623045, "step": 77780 }, { "epoch": 0.6279311931419161, "grad_norm": 1.712511658668518, "learning_rate": 7.446346211319598e-06, "loss": 2.7982906341552733, "step": 77790 }, { "epoch": 0.6280119144676832, "grad_norm": 1.384581446647644, "learning_rate": 7.4447307415813844e-06, "loss": 2.395760345458984, "step": 77800 }, { "epoch": 0.6280926357934503, "grad_norm": 0.9621772766113281, "learning_rate": 7.443115271843171e-06, "loss": 3.1973772048950195, "step": 77810 }, { "epoch": 0.6281733571192173, "grad_norm": 0.8592096567153931, "learning_rate": 7.441499802104958e-06, "loss": 3.1787979125976564, "step": 77820 }, { "epoch": 0.6282540784449844, "grad_norm": 1.1587060689926147, "learning_rate": 7.439884332366745e-06, "loss": 2.996653938293457, "step": 77830 }, { "epoch": 0.6283347997707515, "grad_norm": 0.7451887130737305, "learning_rate": 7.438268862628532e-06, "loss": 2.9136322021484373, "step": 77840 }, { "epoch": 0.6284155210965184, "grad_norm": 0.8830728530883789, "learning_rate": 7.436653392890319e-06, "loss": 2.6915563583374023, "step": 77850 }, { "epoch": 0.6284962424222855, "grad_norm": 0.5624575614929199, "learning_rate": 7.4350379231521055e-06, "loss": 2.512604904174805, "step": 77860 }, { "epoch": 0.6285769637480526, "grad_norm": 1.1256135702133179, "learning_rate": 7.433422453413892e-06, "loss": 2.9819746017456055, "step": 77870 }, { "epoch": 0.6286576850738197, "grad_norm": 0.9543457627296448, "learning_rate": 7.431806983675679e-06, "loss": 2.705801773071289, "step": 77880 }, { "epoch": 0.6287384063995867, "grad_norm": 1.059303879737854, "learning_rate": 7.430191513937466e-06, "loss": 2.789738655090332, "step": 77890 }, { "epoch": 0.6288191277253538, "grad_norm": 0.7766150832176208, "learning_rate": 7.428576044199253e-06, "loss": 3.0058956146240234, "step": 77900 }, { "epoch": 0.6288998490511208, "grad_norm": 0.9036785960197449, "learning_rate": 7.42696057446104e-06, "loss": 2.706836128234863, "step": 77910 }, { "epoch": 0.6289805703768878, "grad_norm": 0.9883001446723938, "learning_rate": 7.425345104722827e-06, "loss": 2.5884372711181642, "step": 77920 }, { "epoch": 0.6290612917026549, "grad_norm": 0.9451298117637634, "learning_rate": 7.4237296349846134e-06, "loss": 2.6479461669921873, "step": 77930 }, { "epoch": 0.629142013028422, "grad_norm": 0.6335777044296265, "learning_rate": 7.4221141652464e-06, "loss": 2.7163808822631834, "step": 77940 }, { "epoch": 0.6292227343541891, "grad_norm": 1.3087043762207031, "learning_rate": 7.420498695508187e-06, "loss": 2.6821157455444338, "step": 77950 }, { "epoch": 0.629303455679956, "grad_norm": 0.9721425771713257, "learning_rate": 7.418883225769974e-06, "loss": 2.549961471557617, "step": 77960 }, { "epoch": 0.6293841770057231, "grad_norm": 2.6314198970794678, "learning_rate": 7.417267756031761e-06, "loss": 2.649661827087402, "step": 77970 }, { "epoch": 0.6294648983314902, "grad_norm": 0.8279659152030945, "learning_rate": 7.415652286293548e-06, "loss": 2.988882064819336, "step": 77980 }, { "epoch": 0.6295456196572573, "grad_norm": 0.9483687281608582, "learning_rate": 7.4140368165553345e-06, "loss": 2.6145734786987305, "step": 77990 }, { "epoch": 0.6296263409830243, "grad_norm": 0.89234858751297, "learning_rate": 7.412421346817121e-06, "loss": 2.5432676315307616, "step": 78000 }, { "epoch": 0.6297070623087914, "grad_norm": 0.814303457736969, "learning_rate": 7.410805877078908e-06, "loss": 2.913314437866211, "step": 78010 }, { "epoch": 0.6297877836345585, "grad_norm": 1.3165239095687866, "learning_rate": 7.409190407340695e-06, "loss": 3.1387248992919923, "step": 78020 }, { "epoch": 0.6298685049603254, "grad_norm": 1.1073150634765625, "learning_rate": 7.407574937602482e-06, "loss": 2.592066764831543, "step": 78030 }, { "epoch": 0.6299492262860925, "grad_norm": 0.9860485792160034, "learning_rate": 7.405959467864269e-06, "loss": 2.570248603820801, "step": 78040 }, { "epoch": 0.6300299476118596, "grad_norm": 1.613558053970337, "learning_rate": 7.404343998126056e-06, "loss": 2.4530588150024415, "step": 78050 }, { "epoch": 0.6301106689376267, "grad_norm": 1.1698194742202759, "learning_rate": 7.4027285283878424e-06, "loss": 2.5793533325195312, "step": 78060 }, { "epoch": 0.6301913902633937, "grad_norm": 1.9898885488510132, "learning_rate": 7.401113058649629e-06, "loss": 2.4077714920043944, "step": 78070 }, { "epoch": 0.6302721115891607, "grad_norm": 0.7854229807853699, "learning_rate": 7.399497588911416e-06, "loss": 2.386849021911621, "step": 78080 }, { "epoch": 0.6303528329149278, "grad_norm": 0.5874007940292358, "learning_rate": 7.397882119173203e-06, "loss": 2.9186445236206056, "step": 78090 }, { "epoch": 0.6304335542406948, "grad_norm": 1.1114851236343384, "learning_rate": 7.39626664943499e-06, "loss": 2.9315515518188477, "step": 78100 }, { "epoch": 0.6305142755664619, "grad_norm": 1.1140801906585693, "learning_rate": 7.394651179696777e-06, "loss": 2.8751552581787108, "step": 78110 }, { "epoch": 0.630594996892229, "grad_norm": 0.730010986328125, "learning_rate": 7.3930357099585635e-06, "loss": 2.9063982009887694, "step": 78120 }, { "epoch": 0.6306757182179961, "grad_norm": 1.0402189493179321, "learning_rate": 7.39142024022035e-06, "loss": 2.6708826065063476, "step": 78130 }, { "epoch": 0.630756439543763, "grad_norm": 1.6374467611312866, "learning_rate": 7.389804770482137e-06, "loss": 3.8030296325683595, "step": 78140 }, { "epoch": 0.6308371608695301, "grad_norm": 0.8125905394554138, "learning_rate": 7.388189300743924e-06, "loss": 2.442226791381836, "step": 78150 }, { "epoch": 0.6309178821952972, "grad_norm": 0.780143678188324, "learning_rate": 7.386573831005711e-06, "loss": 2.887992095947266, "step": 78160 }, { "epoch": 0.6309986035210642, "grad_norm": 1.4087167978286743, "learning_rate": 7.384958361267498e-06, "loss": 2.6188920974731444, "step": 78170 }, { "epoch": 0.6310793248468313, "grad_norm": 0.9020468592643738, "learning_rate": 7.3833428915292846e-06, "loss": 2.447006416320801, "step": 78180 }, { "epoch": 0.6311600461725984, "grad_norm": 1.0250202417373657, "learning_rate": 7.381727421791071e-06, "loss": 2.7002685546875, "step": 78190 }, { "epoch": 0.6312407674983654, "grad_norm": 0.6994464993476868, "learning_rate": 7.380111952052858e-06, "loss": 2.4016347885131837, "step": 78200 }, { "epoch": 0.6313214888241324, "grad_norm": 0.8778663277626038, "learning_rate": 7.378496482314645e-06, "loss": 2.930669403076172, "step": 78210 }, { "epoch": 0.6314022101498995, "grad_norm": 0.8758931756019592, "learning_rate": 7.376881012576432e-06, "loss": 2.6611778259277346, "step": 78220 }, { "epoch": 0.6314829314756666, "grad_norm": 1.2153329849243164, "learning_rate": 7.375265542838219e-06, "loss": 2.4912441253662108, "step": 78230 }, { "epoch": 0.6315636528014336, "grad_norm": 1.0791445970535278, "learning_rate": 7.373650073100006e-06, "loss": 2.6182939529418947, "step": 78240 }, { "epoch": 0.6316443741272006, "grad_norm": 1.3766937255859375, "learning_rate": 7.3720346033617925e-06, "loss": 2.4775585174560546, "step": 78250 }, { "epoch": 0.6317250954529677, "grad_norm": 1.1546709537506104, "learning_rate": 7.370419133623579e-06, "loss": 2.8422782897949217, "step": 78260 }, { "epoch": 0.6318058167787348, "grad_norm": 1.3743048906326294, "learning_rate": 7.368803663885367e-06, "loss": 3.035368537902832, "step": 78270 }, { "epoch": 0.6318865381045018, "grad_norm": 0.7305985689163208, "learning_rate": 7.367188194147154e-06, "loss": 2.463290786743164, "step": 78280 }, { "epoch": 0.6319672594302689, "grad_norm": 0.920222818851471, "learning_rate": 7.365572724408941e-06, "loss": 2.451527976989746, "step": 78290 }, { "epoch": 0.632047980756036, "grad_norm": 0.8585730791091919, "learning_rate": 7.3639572546707276e-06, "loss": 2.8722087860107424, "step": 78300 }, { "epoch": 0.6321287020818029, "grad_norm": 0.7674512267112732, "learning_rate": 7.362341784932514e-06, "loss": 2.727194404602051, "step": 78310 }, { "epoch": 0.63220942340757, "grad_norm": 0.834774911403656, "learning_rate": 7.360726315194301e-06, "loss": 2.849906349182129, "step": 78320 }, { "epoch": 0.6322901447333371, "grad_norm": 0.898373544216156, "learning_rate": 7.359110845456088e-06, "loss": 2.7668447494506836, "step": 78330 }, { "epoch": 0.6323708660591042, "grad_norm": 0.9029568433761597, "learning_rate": 7.357495375717875e-06, "loss": 2.612181854248047, "step": 78340 }, { "epoch": 0.6324515873848712, "grad_norm": 0.8938952684402466, "learning_rate": 7.355879905979662e-06, "loss": 3.1210958480834963, "step": 78350 }, { "epoch": 0.6325323087106383, "grad_norm": 1.4168347120285034, "learning_rate": 7.354264436241449e-06, "loss": 2.6663869857788085, "step": 78360 }, { "epoch": 0.6326130300364053, "grad_norm": 0.7229980826377869, "learning_rate": 7.3526489665032355e-06, "loss": 2.700644874572754, "step": 78370 }, { "epoch": 0.6326937513621723, "grad_norm": 1.0010862350463867, "learning_rate": 7.351033496765022e-06, "loss": 2.789924430847168, "step": 78380 }, { "epoch": 0.6327744726879394, "grad_norm": 0.9060460925102234, "learning_rate": 7.349418027026809e-06, "loss": 3.0080419540405274, "step": 78390 }, { "epoch": 0.6328551940137065, "grad_norm": 0.8245677947998047, "learning_rate": 7.347802557288596e-06, "loss": 2.367650604248047, "step": 78400 }, { "epoch": 0.6329359153394736, "grad_norm": 0.76076740026474, "learning_rate": 7.346187087550383e-06, "loss": 2.3693923950195312, "step": 78410 }, { "epoch": 0.6330166366652406, "grad_norm": 0.9171373248100281, "learning_rate": 7.34457161781217e-06, "loss": 2.736185646057129, "step": 78420 }, { "epoch": 0.6330973579910076, "grad_norm": 0.6732308864593506, "learning_rate": 7.3429561480739566e-06, "loss": 3.1532709121704103, "step": 78430 }, { "epoch": 0.6331780793167747, "grad_norm": 0.9251576066017151, "learning_rate": 7.341340678335743e-06, "loss": 2.887212562561035, "step": 78440 }, { "epoch": 0.6332588006425418, "grad_norm": 1.7788441181182861, "learning_rate": 7.33972520859753e-06, "loss": 3.000604248046875, "step": 78450 }, { "epoch": 0.6333395219683088, "grad_norm": 1.0243810415267944, "learning_rate": 7.338109738859317e-06, "loss": 2.7206892013549804, "step": 78460 }, { "epoch": 0.6334202432940759, "grad_norm": 1.1310003995895386, "learning_rate": 7.336494269121104e-06, "loss": 2.535214424133301, "step": 78470 }, { "epoch": 0.633500964619843, "grad_norm": 0.6668398976325989, "learning_rate": 7.334878799382891e-06, "loss": 2.5643056869506835, "step": 78480 }, { "epoch": 0.6335816859456099, "grad_norm": 0.7960103154182434, "learning_rate": 7.333263329644678e-06, "loss": 2.48099308013916, "step": 78490 }, { "epoch": 0.633662407271377, "grad_norm": 0.7097199559211731, "learning_rate": 7.3316478599064645e-06, "loss": 3.042316436767578, "step": 78500 }, { "epoch": 0.6337431285971441, "grad_norm": 0.8516260981559753, "learning_rate": 7.330032390168251e-06, "loss": 2.4449155807495115, "step": 78510 }, { "epoch": 0.6338238499229112, "grad_norm": 0.8172461986541748, "learning_rate": 7.328416920430038e-06, "loss": 2.8006669998168947, "step": 78520 }, { "epoch": 0.6339045712486782, "grad_norm": 1.5077730417251587, "learning_rate": 7.326801450691825e-06, "loss": 2.8639686584472654, "step": 78530 }, { "epoch": 0.6339852925744452, "grad_norm": 0.9731290340423584, "learning_rate": 7.325185980953612e-06, "loss": 2.851437568664551, "step": 78540 }, { "epoch": 0.6340660139002123, "grad_norm": 0.9417163729667664, "learning_rate": 7.323570511215399e-06, "loss": 2.414925384521484, "step": 78550 }, { "epoch": 0.6341467352259793, "grad_norm": 1.1370856761932373, "learning_rate": 7.3219550414771856e-06, "loss": 2.8260488510131836, "step": 78560 }, { "epoch": 0.6342274565517464, "grad_norm": 0.9153181314468384, "learning_rate": 7.320339571738972e-06, "loss": 2.489810752868652, "step": 78570 }, { "epoch": 0.6343081778775135, "grad_norm": 0.6210336089134216, "learning_rate": 7.318724102000759e-06, "loss": 2.897946357727051, "step": 78580 }, { "epoch": 0.6343888992032806, "grad_norm": 1.0649974346160889, "learning_rate": 7.317108632262546e-06, "loss": 2.6180774688720705, "step": 78590 }, { "epoch": 0.6344696205290475, "grad_norm": 0.7412816882133484, "learning_rate": 7.315493162524333e-06, "loss": 2.790304946899414, "step": 78600 }, { "epoch": 0.6345503418548146, "grad_norm": 0.7696977257728577, "learning_rate": 7.313877692786121e-06, "loss": 2.6113712310791017, "step": 78610 }, { "epoch": 0.6346310631805817, "grad_norm": 0.8936256766319275, "learning_rate": 7.3122622230479075e-06, "loss": 2.629792022705078, "step": 78620 }, { "epoch": 0.6347117845063487, "grad_norm": 1.0423567295074463, "learning_rate": 7.310646753309695e-06, "loss": 2.487052345275879, "step": 78630 }, { "epoch": 0.6347925058321158, "grad_norm": 0.8114693760871887, "learning_rate": 7.309031283571482e-06, "loss": 2.3791759490966795, "step": 78640 }, { "epoch": 0.6348732271578829, "grad_norm": 1.1682299375534058, "learning_rate": 7.307415813833269e-06, "loss": 2.572001075744629, "step": 78650 }, { "epoch": 0.6349539484836499, "grad_norm": 0.5290349721908569, "learning_rate": 7.305800344095056e-06, "loss": 2.674294662475586, "step": 78660 }, { "epoch": 0.6350346698094169, "grad_norm": 0.7934591174125671, "learning_rate": 7.3041848743568426e-06, "loss": 2.647698974609375, "step": 78670 }, { "epoch": 0.635115391135184, "grad_norm": 6.255163669586182, "learning_rate": 7.302569404618629e-06, "loss": 3.0794950485229493, "step": 78680 }, { "epoch": 0.6351961124609511, "grad_norm": 0.5367504358291626, "learning_rate": 7.300953934880416e-06, "loss": 2.5766883850097657, "step": 78690 }, { "epoch": 0.6352768337867181, "grad_norm": 1.1661940813064575, "learning_rate": 7.299338465142203e-06, "loss": 2.5204498291015627, "step": 78700 }, { "epoch": 0.6353575551124852, "grad_norm": 1.1444884538650513, "learning_rate": 7.29772299540399e-06, "loss": 2.5639320373535157, "step": 78710 }, { "epoch": 0.6354382764382522, "grad_norm": 0.8101010322570801, "learning_rate": 7.296107525665777e-06, "loss": 2.6155603408813475, "step": 78720 }, { "epoch": 0.6355189977640193, "grad_norm": 1.1106207370758057, "learning_rate": 7.294492055927564e-06, "loss": 3.1838863372802733, "step": 78730 }, { "epoch": 0.6355997190897863, "grad_norm": 0.863629937171936, "learning_rate": 7.2928765861893505e-06, "loss": 2.9469348907470705, "step": 78740 }, { "epoch": 0.6356804404155534, "grad_norm": 0.8072428703308105, "learning_rate": 7.291261116451137e-06, "loss": 2.403166389465332, "step": 78750 }, { "epoch": 0.6357611617413205, "grad_norm": 0.6603407859802246, "learning_rate": 7.289645646712924e-06, "loss": 2.511123466491699, "step": 78760 }, { "epoch": 0.6358418830670874, "grad_norm": 1.2400678396224976, "learning_rate": 7.288030176974711e-06, "loss": 2.5222700119018553, "step": 78770 }, { "epoch": 0.6359226043928545, "grad_norm": 1.7701443433761597, "learning_rate": 7.286414707236498e-06, "loss": 2.5202558517456053, "step": 78780 }, { "epoch": 0.6360033257186216, "grad_norm": 0.8410449624061584, "learning_rate": 7.284799237498285e-06, "loss": 2.8164377212524414, "step": 78790 }, { "epoch": 0.6360840470443887, "grad_norm": 0.5101267099380493, "learning_rate": 7.2831837677600715e-06, "loss": 2.7604085922241213, "step": 78800 }, { "epoch": 0.6361647683701557, "grad_norm": 0.684634268283844, "learning_rate": 7.281568298021858e-06, "loss": 2.683582878112793, "step": 78810 }, { "epoch": 0.6362454896959228, "grad_norm": 0.9263836741447449, "learning_rate": 7.279952828283645e-06, "loss": 2.966082000732422, "step": 78820 }, { "epoch": 0.6363262110216898, "grad_norm": 1.00545072555542, "learning_rate": 7.278337358545432e-06, "loss": 2.6600902557373045, "step": 78830 }, { "epoch": 0.6364069323474568, "grad_norm": 0.723422646522522, "learning_rate": 7.276721888807219e-06, "loss": 2.2772804260253907, "step": 78840 }, { "epoch": 0.6364876536732239, "grad_norm": 0.8198635578155518, "learning_rate": 7.275106419069006e-06, "loss": 2.878268814086914, "step": 78850 }, { "epoch": 0.636568374998991, "grad_norm": 0.7942833304405212, "learning_rate": 7.273490949330793e-06, "loss": 2.643166351318359, "step": 78860 }, { "epoch": 0.6366490963247581, "grad_norm": 1.2930548191070557, "learning_rate": 7.2718754795925795e-06, "loss": 2.970025062561035, "step": 78870 }, { "epoch": 0.636729817650525, "grad_norm": 1.2028372287750244, "learning_rate": 7.270260009854366e-06, "loss": 2.5688545227050783, "step": 78880 }, { "epoch": 0.6368105389762921, "grad_norm": 1.4414503574371338, "learning_rate": 7.268644540116153e-06, "loss": 2.5217485427856445, "step": 78890 }, { "epoch": 0.6368912603020592, "grad_norm": 0.8509148955345154, "learning_rate": 7.26702907037794e-06, "loss": 2.685660552978516, "step": 78900 }, { "epoch": 0.6369719816278262, "grad_norm": 0.628675103187561, "learning_rate": 7.265413600639727e-06, "loss": 2.9916011810302736, "step": 78910 }, { "epoch": 0.6370527029535933, "grad_norm": 0.5954420566558838, "learning_rate": 7.263798130901514e-06, "loss": 3.2799350738525392, "step": 78920 }, { "epoch": 0.6371334242793604, "grad_norm": 1.487982988357544, "learning_rate": 7.2621826611633005e-06, "loss": 2.515020751953125, "step": 78930 }, { "epoch": 0.6372141456051275, "grad_norm": 0.5637498497962952, "learning_rate": 7.260567191425087e-06, "loss": 2.9329147338867188, "step": 78940 }, { "epoch": 0.6372948669308944, "grad_norm": 0.8231009840965271, "learning_rate": 7.258951721686874e-06, "loss": 2.673369789123535, "step": 78950 }, { "epoch": 0.6373755882566615, "grad_norm": 1.2966965436935425, "learning_rate": 7.257336251948661e-06, "loss": 2.5525529861450194, "step": 78960 }, { "epoch": 0.6374563095824286, "grad_norm": 1.114242434501648, "learning_rate": 7.255720782210448e-06, "loss": 3.4459609985351562, "step": 78970 }, { "epoch": 0.6375370309081957, "grad_norm": 1.0460443496704102, "learning_rate": 7.254105312472235e-06, "loss": 3.1273681640625, "step": 78980 }, { "epoch": 0.6376177522339627, "grad_norm": 1.4129294157028198, "learning_rate": 7.252489842734022e-06, "loss": 2.600905418395996, "step": 78990 }, { "epoch": 0.6376984735597298, "grad_norm": 1.0350579023361206, "learning_rate": 7.2508743729958085e-06, "loss": 2.548430252075195, "step": 79000 }, { "epoch": 0.6377791948854968, "grad_norm": 0.5724892020225525, "learning_rate": 7.249258903257595e-06, "loss": 2.4987058639526367, "step": 79010 }, { "epoch": 0.6378599162112638, "grad_norm": 0.8880521655082703, "learning_rate": 7.247643433519382e-06, "loss": 2.9110654830932616, "step": 79020 }, { "epoch": 0.6379406375370309, "grad_norm": 0.9979333281517029, "learning_rate": 7.246027963781169e-06, "loss": 3.031107711791992, "step": 79030 }, { "epoch": 0.638021358862798, "grad_norm": 1.0678390264511108, "learning_rate": 7.244412494042956e-06, "loss": 2.996020698547363, "step": 79040 }, { "epoch": 0.6381020801885651, "grad_norm": 0.6909837126731873, "learning_rate": 7.242797024304743e-06, "loss": 3.121195602416992, "step": 79050 }, { "epoch": 0.638182801514332, "grad_norm": 0.809501051902771, "learning_rate": 7.2411815545665295e-06, "loss": 2.6724523544311523, "step": 79060 }, { "epoch": 0.6382635228400991, "grad_norm": 1.1324183940887451, "learning_rate": 7.239566084828316e-06, "loss": 2.6775007247924805, "step": 79070 }, { "epoch": 0.6383442441658662, "grad_norm": 1.033555269241333, "learning_rate": 7.237950615090103e-06, "loss": 2.4733131408691404, "step": 79080 }, { "epoch": 0.6384249654916332, "grad_norm": 1.1235713958740234, "learning_rate": 7.23633514535189e-06, "loss": 2.8126689910888674, "step": 79090 }, { "epoch": 0.6385056868174003, "grad_norm": 1.0832123756408691, "learning_rate": 7.234719675613677e-06, "loss": 2.421977996826172, "step": 79100 }, { "epoch": 0.6385864081431674, "grad_norm": 1.1682963371276855, "learning_rate": 7.233104205875464e-06, "loss": 2.7550458908081055, "step": 79110 }, { "epoch": 0.6386671294689344, "grad_norm": 0.7809674143791199, "learning_rate": 7.231488736137251e-06, "loss": 2.629428672790527, "step": 79120 }, { "epoch": 0.6387478507947014, "grad_norm": 0.8114601373672485, "learning_rate": 7.2298732663990375e-06, "loss": 2.5269460678100586, "step": 79130 }, { "epoch": 0.6388285721204685, "grad_norm": 0.9305824637413025, "learning_rate": 7.228257796660824e-06, "loss": 2.263453483581543, "step": 79140 }, { "epoch": 0.6389092934462356, "grad_norm": 1.4290882349014282, "learning_rate": 7.226642326922612e-06, "loss": 2.9736101150512697, "step": 79150 }, { "epoch": 0.6389900147720026, "grad_norm": 1.003208875656128, "learning_rate": 7.225026857184399e-06, "loss": 2.636829376220703, "step": 79160 }, { "epoch": 0.6390707360977697, "grad_norm": 0.7808073163032532, "learning_rate": 7.223411387446186e-06, "loss": 2.6654180526733398, "step": 79170 }, { "epoch": 0.6391514574235367, "grad_norm": 1.6415331363677979, "learning_rate": 7.2217959177079725e-06, "loss": 2.847531318664551, "step": 79180 }, { "epoch": 0.6392321787493038, "grad_norm": 0.7742087244987488, "learning_rate": 7.220180447969759e-06, "loss": 2.8111446380615233, "step": 79190 }, { "epoch": 0.6393129000750708, "grad_norm": 1.0255231857299805, "learning_rate": 7.218564978231546e-06, "loss": 2.5454534530639648, "step": 79200 }, { "epoch": 0.6393936214008379, "grad_norm": 0.9819392561912537, "learning_rate": 7.216949508493333e-06, "loss": 3.280051422119141, "step": 79210 }, { "epoch": 0.639474342726605, "grad_norm": 0.679625928401947, "learning_rate": 7.21533403875512e-06, "loss": 2.8066186904907227, "step": 79220 }, { "epoch": 0.639555064052372, "grad_norm": 0.6408745050430298, "learning_rate": 7.213718569016907e-06, "loss": 2.61025390625, "step": 79230 }, { "epoch": 0.639635785378139, "grad_norm": 0.8990703821182251, "learning_rate": 7.212103099278694e-06, "loss": 2.7221269607543945, "step": 79240 }, { "epoch": 0.6397165067039061, "grad_norm": 1.1292285919189453, "learning_rate": 7.2104876295404805e-06, "loss": 2.4314905166625977, "step": 79250 }, { "epoch": 0.6397972280296732, "grad_norm": 1.270087718963623, "learning_rate": 7.208872159802267e-06, "loss": 3.0524974822998048, "step": 79260 }, { "epoch": 0.6398779493554402, "grad_norm": 0.8620503544807434, "learning_rate": 7.207256690064054e-06, "loss": 2.8874975204467774, "step": 79270 }, { "epoch": 0.6399586706812073, "grad_norm": 1.2029484510421753, "learning_rate": 7.205641220325841e-06, "loss": 2.676081657409668, "step": 79280 }, { "epoch": 0.6400393920069744, "grad_norm": 1.0756093263626099, "learning_rate": 7.204025750587628e-06, "loss": 2.6949838638305663, "step": 79290 }, { "epoch": 0.6401201133327413, "grad_norm": 1.1645513772964478, "learning_rate": 7.202410280849415e-06, "loss": 2.5627296447753904, "step": 79300 }, { "epoch": 0.6402008346585084, "grad_norm": 1.3255107402801514, "learning_rate": 7.2007948111112015e-06, "loss": 2.6986625671386717, "step": 79310 }, { "epoch": 0.6402815559842755, "grad_norm": 0.9654014110565186, "learning_rate": 7.199179341372988e-06, "loss": 2.6032527923583983, "step": 79320 }, { "epoch": 0.6403622773100426, "grad_norm": 1.2646899223327637, "learning_rate": 7.197563871634775e-06, "loss": 3.0471208572387694, "step": 79330 }, { "epoch": 0.6404429986358096, "grad_norm": 0.9900116324424744, "learning_rate": 7.195948401896562e-06, "loss": 2.759373092651367, "step": 79340 }, { "epoch": 0.6405237199615766, "grad_norm": 1.2736583948135376, "learning_rate": 7.194332932158349e-06, "loss": 2.6685003280639648, "step": 79350 }, { "epoch": 0.6406044412873437, "grad_norm": 0.6397157907485962, "learning_rate": 7.192717462420136e-06, "loss": 2.457688331604004, "step": 79360 }, { "epoch": 0.6406851626131107, "grad_norm": 0.7484222054481506, "learning_rate": 7.191101992681923e-06, "loss": 3.1956762313842773, "step": 79370 }, { "epoch": 0.6407658839388778, "grad_norm": 1.6268385648727417, "learning_rate": 7.1894865229437094e-06, "loss": 2.9622852325439455, "step": 79380 }, { "epoch": 0.6408466052646449, "grad_norm": 1.1540682315826416, "learning_rate": 7.187871053205496e-06, "loss": 2.697296905517578, "step": 79390 }, { "epoch": 0.640927326590412, "grad_norm": 1.3309434652328491, "learning_rate": 7.186255583467283e-06, "loss": 3.2606170654296873, "step": 79400 }, { "epoch": 0.6410080479161789, "grad_norm": 0.9095059037208557, "learning_rate": 7.18464011372907e-06, "loss": 2.524079704284668, "step": 79410 }, { "epoch": 0.641088769241946, "grad_norm": 1.6209564208984375, "learning_rate": 7.183024643990857e-06, "loss": 2.920182228088379, "step": 79420 }, { "epoch": 0.6411694905677131, "grad_norm": 1.4650623798370361, "learning_rate": 7.181409174252644e-06, "loss": 2.646210861206055, "step": 79430 }, { "epoch": 0.6412502118934802, "grad_norm": 1.7767679691314697, "learning_rate": 7.1797937045144305e-06, "loss": 2.9245983123779298, "step": 79440 }, { "epoch": 0.6413309332192472, "grad_norm": 0.8009535670280457, "learning_rate": 7.178178234776217e-06, "loss": 2.5965904235839843, "step": 79450 }, { "epoch": 0.6414116545450143, "grad_norm": 1.032547950744629, "learning_rate": 7.176562765038004e-06, "loss": 2.39119930267334, "step": 79460 }, { "epoch": 0.6414923758707813, "grad_norm": 1.8012428283691406, "learning_rate": 7.174947295299791e-06, "loss": 3.002483367919922, "step": 79470 }, { "epoch": 0.6415730971965483, "grad_norm": 0.7947415113449097, "learning_rate": 7.173331825561578e-06, "loss": 2.5858497619628906, "step": 79480 }, { "epoch": 0.6416538185223154, "grad_norm": 1.0636425018310547, "learning_rate": 7.171716355823365e-06, "loss": 2.887190246582031, "step": 79490 }, { "epoch": 0.6417345398480825, "grad_norm": 2.1002793312072754, "learning_rate": 7.170100886085152e-06, "loss": 2.970549201965332, "step": 79500 }, { "epoch": 0.6418152611738496, "grad_norm": 1.3660186529159546, "learning_rate": 7.1684854163469384e-06, "loss": 2.7526912689208984, "step": 79510 }, { "epoch": 0.6418959824996165, "grad_norm": 0.6249540448188782, "learning_rate": 7.166869946608725e-06, "loss": 2.5608638763427733, "step": 79520 }, { "epoch": 0.6419767038253836, "grad_norm": 0.8707249760627747, "learning_rate": 7.165254476870512e-06, "loss": 2.519794464111328, "step": 79530 }, { "epoch": 0.6420574251511507, "grad_norm": 0.8956209421157837, "learning_rate": 7.163639007132299e-06, "loss": 3.0259424209594727, "step": 79540 }, { "epoch": 0.6421381464769177, "grad_norm": 1.2711718082427979, "learning_rate": 7.162023537394086e-06, "loss": 2.6987798690795897, "step": 79550 }, { "epoch": 0.6422188678026848, "grad_norm": 0.8210591673851013, "learning_rate": 7.160408067655873e-06, "loss": 2.2563348770141602, "step": 79560 }, { "epoch": 0.6422995891284519, "grad_norm": 0.8028132319450378, "learning_rate": 7.1587925979176595e-06, "loss": 2.618153953552246, "step": 79570 }, { "epoch": 0.642380310454219, "grad_norm": 0.7462582588195801, "learning_rate": 7.157177128179446e-06, "loss": 2.7792732238769533, "step": 79580 }, { "epoch": 0.6424610317799859, "grad_norm": 1.1584784984588623, "learning_rate": 7.155561658441233e-06, "loss": 2.7396568298339843, "step": 79590 }, { "epoch": 0.642541753105753, "grad_norm": 0.6628514528274536, "learning_rate": 7.15394618870302e-06, "loss": 2.9427661895751953, "step": 79600 }, { "epoch": 0.6426224744315201, "grad_norm": 1.0521180629730225, "learning_rate": 7.152330718964807e-06, "loss": 2.6352602005004884, "step": 79610 }, { "epoch": 0.6427031957572871, "grad_norm": 1.2439645528793335, "learning_rate": 7.150715249226594e-06, "loss": 2.547908973693848, "step": 79620 }, { "epoch": 0.6427839170830542, "grad_norm": 0.895904004573822, "learning_rate": 7.149099779488381e-06, "loss": 2.5792861938476563, "step": 79630 }, { "epoch": 0.6428646384088212, "grad_norm": 1.2129955291748047, "learning_rate": 7.1474843097501674e-06, "loss": 2.8769216537475586, "step": 79640 }, { "epoch": 0.6429453597345883, "grad_norm": 0.8163716197013855, "learning_rate": 7.145868840011954e-06, "loss": 2.8730262756347655, "step": 79650 }, { "epoch": 0.6430260810603553, "grad_norm": 0.8870051503181458, "learning_rate": 7.144253370273742e-06, "loss": 2.36038761138916, "step": 79660 }, { "epoch": 0.6431068023861224, "grad_norm": 0.6767573356628418, "learning_rate": 7.142637900535529e-06, "loss": 2.7672903060913088, "step": 79670 }, { "epoch": 0.6431875237118895, "grad_norm": 0.5775416493415833, "learning_rate": 7.141022430797316e-06, "loss": 2.7922845840454102, "step": 79680 }, { "epoch": 0.6432682450376564, "grad_norm": 0.9847665429115295, "learning_rate": 7.1394069610591025e-06, "loss": 2.335875701904297, "step": 79690 }, { "epoch": 0.6433489663634235, "grad_norm": 0.8621008396148682, "learning_rate": 7.137791491320889e-06, "loss": 2.9451086044311525, "step": 79700 }, { "epoch": 0.6434296876891906, "grad_norm": 0.6202684640884399, "learning_rate": 7.136176021582676e-06, "loss": 2.908928108215332, "step": 79710 }, { "epoch": 0.6435104090149577, "grad_norm": 1.0930639505386353, "learning_rate": 7.134560551844463e-06, "loss": 2.4663856506347654, "step": 79720 }, { "epoch": 0.6435911303407247, "grad_norm": 0.7512004971504211, "learning_rate": 7.13294508210625e-06, "loss": 2.5347415924072267, "step": 79730 }, { "epoch": 0.6436718516664918, "grad_norm": 0.6266021132469177, "learning_rate": 7.131329612368037e-06, "loss": 2.504949378967285, "step": 79740 }, { "epoch": 0.6437525729922589, "grad_norm": 1.6789907217025757, "learning_rate": 7.129714142629824e-06, "loss": 2.5796613693237305, "step": 79750 }, { "epoch": 0.6438332943180258, "grad_norm": 1.078126072883606, "learning_rate": 7.1280986728916104e-06, "loss": 2.8797521591186523, "step": 79760 }, { "epoch": 0.6439140156437929, "grad_norm": 0.6571500301361084, "learning_rate": 7.126483203153397e-06, "loss": 2.8038795471191404, "step": 79770 }, { "epoch": 0.64399473696956, "grad_norm": 0.7180591821670532, "learning_rate": 7.124867733415184e-06, "loss": 2.7172340393066405, "step": 79780 }, { "epoch": 0.6440754582953271, "grad_norm": 1.055384635925293, "learning_rate": 7.123252263676971e-06, "loss": 3.195298767089844, "step": 79790 }, { "epoch": 0.6441561796210941, "grad_norm": 1.0941452980041504, "learning_rate": 7.121636793938758e-06, "loss": 2.9169342041015627, "step": 79800 }, { "epoch": 0.6442369009468611, "grad_norm": 1.006473422050476, "learning_rate": 7.120021324200545e-06, "loss": 2.580955696105957, "step": 79810 }, { "epoch": 0.6443176222726282, "grad_norm": 1.5856127738952637, "learning_rate": 7.1184058544623315e-06, "loss": 2.823969268798828, "step": 79820 }, { "epoch": 0.6443983435983952, "grad_norm": 0.8429692387580872, "learning_rate": 7.116790384724118e-06, "loss": 3.0346046447753907, "step": 79830 }, { "epoch": 0.6444790649241623, "grad_norm": 0.9238036870956421, "learning_rate": 7.115174914985905e-06, "loss": 2.3925382614135744, "step": 79840 }, { "epoch": 0.6445597862499294, "grad_norm": 0.9830622673034668, "learning_rate": 7.113559445247692e-06, "loss": 2.7187089920043945, "step": 79850 }, { "epoch": 0.6446405075756965, "grad_norm": 1.2427316904067993, "learning_rate": 7.11194397550948e-06, "loss": 2.3354618072509767, "step": 79860 }, { "epoch": 0.6447212289014634, "grad_norm": 0.6024761199951172, "learning_rate": 7.110328505771267e-06, "loss": 2.753507614135742, "step": 79870 }, { "epoch": 0.6448019502272305, "grad_norm": 1.4349719285964966, "learning_rate": 7.1087130360330534e-06, "loss": 2.955292510986328, "step": 79880 }, { "epoch": 0.6448826715529976, "grad_norm": 0.7754011750221252, "learning_rate": 7.10709756629484e-06, "loss": 3.056570053100586, "step": 79890 }, { "epoch": 0.6449633928787646, "grad_norm": 0.5218403339385986, "learning_rate": 7.105482096556627e-06, "loss": 2.6999885559082033, "step": 79900 }, { "epoch": 0.6450441142045317, "grad_norm": 0.6807119846343994, "learning_rate": 7.103866626818414e-06, "loss": 2.325654220581055, "step": 79910 }, { "epoch": 0.6451248355302988, "grad_norm": 0.8894354104995728, "learning_rate": 7.102251157080201e-06, "loss": 2.8541351318359376, "step": 79920 }, { "epoch": 0.6452055568560658, "grad_norm": 0.8252537250518799, "learning_rate": 7.100635687341988e-06, "loss": 3.181910514831543, "step": 79930 }, { "epoch": 0.6452862781818328, "grad_norm": 0.9562362432479858, "learning_rate": 7.0990202176037745e-06, "loss": 2.9626697540283202, "step": 79940 }, { "epoch": 0.6453669995075999, "grad_norm": 0.6514262557029724, "learning_rate": 7.097404747865561e-06, "loss": 2.275962066650391, "step": 79950 }, { "epoch": 0.645447720833367, "grad_norm": 1.0632498264312744, "learning_rate": 7.095789278127348e-06, "loss": 2.765062713623047, "step": 79960 }, { "epoch": 0.6455284421591341, "grad_norm": 0.8790798783302307, "learning_rate": 7.094173808389135e-06, "loss": 2.372883415222168, "step": 79970 }, { "epoch": 0.645609163484901, "grad_norm": 0.7090761661529541, "learning_rate": 7.092558338650922e-06, "loss": 2.7878139495849608, "step": 79980 }, { "epoch": 0.6456898848106681, "grad_norm": 1.5785555839538574, "learning_rate": 7.090942868912709e-06, "loss": 2.5897186279296873, "step": 79990 }, { "epoch": 0.6457706061364352, "grad_norm": 0.9012588262557983, "learning_rate": 7.0893273991744956e-06, "loss": 3.3518009185791016, "step": 80000 }, { "epoch": 0.6458513274622022, "grad_norm": 1.0744009017944336, "learning_rate": 7.087711929436282e-06, "loss": 2.790229606628418, "step": 80010 }, { "epoch": 0.6459320487879693, "grad_norm": 1.2969857454299927, "learning_rate": 7.08609645969807e-06, "loss": 2.752802276611328, "step": 80020 }, { "epoch": 0.6460127701137364, "grad_norm": 2.17771053314209, "learning_rate": 7.084480989959857e-06, "loss": 2.8586605072021483, "step": 80030 }, { "epoch": 0.6460934914395035, "grad_norm": 1.1883610486984253, "learning_rate": 7.082865520221644e-06, "loss": 2.6096654891967774, "step": 80040 }, { "epoch": 0.6461742127652704, "grad_norm": 0.9248318672180176, "learning_rate": 7.081250050483431e-06, "loss": 2.4803081512451173, "step": 80050 }, { "epoch": 0.6462549340910375, "grad_norm": 0.9835373163223267, "learning_rate": 7.0796345807452175e-06, "loss": 3.5883541107177734, "step": 80060 }, { "epoch": 0.6463356554168046, "grad_norm": 1.0028760433197021, "learning_rate": 7.078019111007004e-06, "loss": 2.7718950271606446, "step": 80070 }, { "epoch": 0.6464163767425716, "grad_norm": 1.3138927221298218, "learning_rate": 7.076403641268791e-06, "loss": 3.2434337615966795, "step": 80080 }, { "epoch": 0.6464970980683387, "grad_norm": 0.8464497327804565, "learning_rate": 7.074788171530578e-06, "loss": 2.8869655609130858, "step": 80090 }, { "epoch": 0.6465778193941057, "grad_norm": 0.9221368432044983, "learning_rate": 7.073172701792365e-06, "loss": 2.4249895095825194, "step": 80100 }, { "epoch": 0.6466585407198728, "grad_norm": 0.9600338935852051, "learning_rate": 7.071557232054152e-06, "loss": 2.3115562438964843, "step": 80110 }, { "epoch": 0.6467392620456398, "grad_norm": 0.6971051692962646, "learning_rate": 7.0699417623159386e-06, "loss": 2.5890552520751955, "step": 80120 }, { "epoch": 0.6468199833714069, "grad_norm": 0.7818065285682678, "learning_rate": 7.068326292577725e-06, "loss": 2.368218994140625, "step": 80130 }, { "epoch": 0.646900704697174, "grad_norm": 1.1158806085586548, "learning_rate": 7.066710822839512e-06, "loss": 2.839207649230957, "step": 80140 }, { "epoch": 0.646981426022941, "grad_norm": 0.6118952631950378, "learning_rate": 7.065095353101299e-06, "loss": 2.9689895629882814, "step": 80150 }, { "epoch": 0.647062147348708, "grad_norm": 0.8725705742835999, "learning_rate": 7.063479883363086e-06, "loss": 2.7902225494384765, "step": 80160 }, { "epoch": 0.6471428686744751, "grad_norm": 1.0679309368133545, "learning_rate": 7.061864413624873e-06, "loss": 3.100557327270508, "step": 80170 }, { "epoch": 0.6472235900002422, "grad_norm": 1.4451143741607666, "learning_rate": 7.06024894388666e-06, "loss": 2.8187210083007814, "step": 80180 }, { "epoch": 0.6473043113260092, "grad_norm": 0.8069572448730469, "learning_rate": 7.0586334741484465e-06, "loss": 2.3667861938476564, "step": 80190 }, { "epoch": 0.6473850326517763, "grad_norm": 1.1182990074157715, "learning_rate": 7.057018004410233e-06, "loss": 2.9908273696899412, "step": 80200 }, { "epoch": 0.6474657539775434, "grad_norm": 0.8190679550170898, "learning_rate": 7.05540253467202e-06, "loss": 2.589138221740723, "step": 80210 }, { "epoch": 0.6475464753033103, "grad_norm": 0.6331313252449036, "learning_rate": 7.053787064933807e-06, "loss": 3.0060302734375, "step": 80220 }, { "epoch": 0.6476271966290774, "grad_norm": 0.6454750895500183, "learning_rate": 7.052171595195594e-06, "loss": 2.4617282867431642, "step": 80230 }, { "epoch": 0.6477079179548445, "grad_norm": 0.826347827911377, "learning_rate": 7.050556125457381e-06, "loss": 2.6025625228881837, "step": 80240 }, { "epoch": 0.6477886392806116, "grad_norm": 0.6485280990600586, "learning_rate": 7.0489406557191676e-06, "loss": 3.223307800292969, "step": 80250 }, { "epoch": 0.6478693606063786, "grad_norm": 1.0999398231506348, "learning_rate": 7.047325185980954e-06, "loss": 2.1888599395751953, "step": 80260 }, { "epoch": 0.6479500819321456, "grad_norm": 0.8961489796638489, "learning_rate": 7.045709716242741e-06, "loss": 3.4388118743896485, "step": 80270 }, { "epoch": 0.6480308032579127, "grad_norm": 1.0827451944351196, "learning_rate": 7.044094246504528e-06, "loss": 2.711155891418457, "step": 80280 }, { "epoch": 0.6481115245836797, "grad_norm": 0.7303338646888733, "learning_rate": 7.042478776766315e-06, "loss": 2.6855709075927736, "step": 80290 }, { "epoch": 0.6481922459094468, "grad_norm": 0.8413524031639099, "learning_rate": 7.040863307028102e-06, "loss": 2.723245620727539, "step": 80300 }, { "epoch": 0.6482729672352139, "grad_norm": 0.9627522826194763, "learning_rate": 7.039247837289889e-06, "loss": 2.838251304626465, "step": 80310 }, { "epoch": 0.648353688560981, "grad_norm": 1.6736112833023071, "learning_rate": 7.0376323675516755e-06, "loss": 2.7554180145263674, "step": 80320 }, { "epoch": 0.6484344098867479, "grad_norm": 1.0786446332931519, "learning_rate": 7.036016897813462e-06, "loss": 2.707330322265625, "step": 80330 }, { "epoch": 0.648515131212515, "grad_norm": 0.6909009218215942, "learning_rate": 7.034401428075249e-06, "loss": 2.681945037841797, "step": 80340 }, { "epoch": 0.6485958525382821, "grad_norm": 0.7357211112976074, "learning_rate": 7.032785958337036e-06, "loss": 2.692257308959961, "step": 80350 }, { "epoch": 0.6486765738640491, "grad_norm": 0.8627281785011292, "learning_rate": 7.031170488598823e-06, "loss": 2.635818672180176, "step": 80360 }, { "epoch": 0.6487572951898162, "grad_norm": 0.88861483335495, "learning_rate": 7.02955501886061e-06, "loss": 2.536867141723633, "step": 80370 }, { "epoch": 0.6488380165155833, "grad_norm": 1.2154955863952637, "learning_rate": 7.0279395491223966e-06, "loss": 2.7632736206054687, "step": 80380 }, { "epoch": 0.6489187378413503, "grad_norm": 0.8843140602111816, "learning_rate": 7.026324079384183e-06, "loss": 2.3244094848632812, "step": 80390 }, { "epoch": 0.6489994591671173, "grad_norm": 0.9226324558258057, "learning_rate": 7.02470860964597e-06, "loss": 3.005389595031738, "step": 80400 }, { "epoch": 0.6490801804928844, "grad_norm": 0.9153398275375366, "learning_rate": 7.023093139907757e-06, "loss": 2.5667205810546876, "step": 80410 }, { "epoch": 0.6491609018186515, "grad_norm": 1.1672176122665405, "learning_rate": 7.021477670169544e-06, "loss": 2.8229082107543944, "step": 80420 }, { "epoch": 0.6492416231444186, "grad_norm": 0.8114640116691589, "learning_rate": 7.019862200431331e-06, "loss": 2.753119468688965, "step": 80430 }, { "epoch": 0.6493223444701856, "grad_norm": 1.3059711456298828, "learning_rate": 7.018246730693118e-06, "loss": 2.816822052001953, "step": 80440 }, { "epoch": 0.6494030657959526, "grad_norm": 0.8567185997962952, "learning_rate": 7.0166312609549045e-06, "loss": 2.486929512023926, "step": 80450 }, { "epoch": 0.6494837871217197, "grad_norm": 1.1986736059188843, "learning_rate": 7.015015791216691e-06, "loss": 2.2079387664794923, "step": 80460 }, { "epoch": 0.6495645084474867, "grad_norm": 0.6312341690063477, "learning_rate": 7.013400321478478e-06, "loss": 2.653631591796875, "step": 80470 }, { "epoch": 0.6496452297732538, "grad_norm": 0.9550114274024963, "learning_rate": 7.011784851740265e-06, "loss": 2.3691389083862306, "step": 80480 }, { "epoch": 0.6497259510990209, "grad_norm": 1.1582878828048706, "learning_rate": 7.010169382002052e-06, "loss": 2.7070512771606445, "step": 80490 }, { "epoch": 0.649806672424788, "grad_norm": 0.9420998096466064, "learning_rate": 7.008553912263839e-06, "loss": 2.7273569107055664, "step": 80500 }, { "epoch": 0.6498873937505549, "grad_norm": 0.885684072971344, "learning_rate": 7.0069384425256255e-06, "loss": 2.4820417404174804, "step": 80510 }, { "epoch": 0.649968115076322, "grad_norm": 0.8424134254455566, "learning_rate": 7.005322972787412e-06, "loss": 2.644272232055664, "step": 80520 }, { "epoch": 0.6500488364020891, "grad_norm": 1.0834472179412842, "learning_rate": 7.003707503049199e-06, "loss": 2.408443260192871, "step": 80530 }, { "epoch": 0.6501295577278561, "grad_norm": 0.930380642414093, "learning_rate": 7.002092033310987e-06, "loss": 2.162184715270996, "step": 80540 }, { "epoch": 0.6502102790536232, "grad_norm": 0.8877745270729065, "learning_rate": 7.000476563572774e-06, "loss": 2.658127784729004, "step": 80550 }, { "epoch": 0.6502910003793902, "grad_norm": 1.5247186422348022, "learning_rate": 6.998861093834561e-06, "loss": 2.4250362396240233, "step": 80560 }, { "epoch": 0.6503717217051573, "grad_norm": 1.0385961532592773, "learning_rate": 6.9972456240963475e-06, "loss": 2.747074508666992, "step": 80570 }, { "epoch": 0.6504524430309243, "grad_norm": 0.9079479575157166, "learning_rate": 6.995630154358134e-06, "loss": 3.377829360961914, "step": 80580 }, { "epoch": 0.6505331643566914, "grad_norm": 0.8159359097480774, "learning_rate": 6.994014684619921e-06, "loss": 2.7744749069213865, "step": 80590 }, { "epoch": 0.6506138856824585, "grad_norm": 1.2670323848724365, "learning_rate": 6.992399214881708e-06, "loss": 2.933418083190918, "step": 80600 }, { "epoch": 0.6506946070082255, "grad_norm": 1.1777280569076538, "learning_rate": 6.990783745143495e-06, "loss": 2.799901008605957, "step": 80610 }, { "epoch": 0.6507753283339925, "grad_norm": 1.1028172969818115, "learning_rate": 6.989168275405282e-06, "loss": 2.6721729278564452, "step": 80620 }, { "epoch": 0.6508560496597596, "grad_norm": 0.9660033583641052, "learning_rate": 6.9875528056670685e-06, "loss": 2.1134664535522463, "step": 80630 }, { "epoch": 0.6509367709855267, "grad_norm": 0.845605731010437, "learning_rate": 6.985937335928855e-06, "loss": 2.473479652404785, "step": 80640 }, { "epoch": 0.6510174923112937, "grad_norm": 0.9997915625572205, "learning_rate": 6.984321866190642e-06, "loss": 2.8445159912109377, "step": 80650 }, { "epoch": 0.6510982136370608, "grad_norm": 0.8653085827827454, "learning_rate": 6.982706396452429e-06, "loss": 2.5905834197998048, "step": 80660 }, { "epoch": 0.6511789349628279, "grad_norm": 0.8136032223701477, "learning_rate": 6.981090926714216e-06, "loss": 2.993632507324219, "step": 80670 }, { "epoch": 0.6512596562885948, "grad_norm": 1.4738171100616455, "learning_rate": 6.979475456976003e-06, "loss": 2.524518585205078, "step": 80680 }, { "epoch": 0.6513403776143619, "grad_norm": 1.632475733757019, "learning_rate": 6.97785998723779e-06, "loss": 3.2113662719726563, "step": 80690 }, { "epoch": 0.651421098940129, "grad_norm": 0.9917019605636597, "learning_rate": 6.9762445174995765e-06, "loss": 2.539699745178223, "step": 80700 }, { "epoch": 0.6515018202658961, "grad_norm": 1.5904386043548584, "learning_rate": 6.974629047761363e-06, "loss": 2.5758544921875, "step": 80710 }, { "epoch": 0.6515825415916631, "grad_norm": 0.9763338565826416, "learning_rate": 6.97301357802315e-06, "loss": 2.7589176177978514, "step": 80720 }, { "epoch": 0.6516632629174302, "grad_norm": 0.7269776463508606, "learning_rate": 6.971398108284937e-06, "loss": 2.611642837524414, "step": 80730 }, { "epoch": 0.6517439842431972, "grad_norm": 0.8127608299255371, "learning_rate": 6.969782638546724e-06, "loss": 2.884556007385254, "step": 80740 }, { "epoch": 0.6518247055689642, "grad_norm": 1.0591256618499756, "learning_rate": 6.968167168808511e-06, "loss": 3.0698827743530273, "step": 80750 }, { "epoch": 0.6519054268947313, "grad_norm": 0.9277523159980774, "learning_rate": 6.9665516990702975e-06, "loss": 2.70227108001709, "step": 80760 }, { "epoch": 0.6519861482204984, "grad_norm": 0.781603217124939, "learning_rate": 6.964936229332084e-06, "loss": 2.589457321166992, "step": 80770 }, { "epoch": 0.6520668695462655, "grad_norm": 1.206650972366333, "learning_rate": 6.963320759593871e-06, "loss": 2.3755550384521484, "step": 80780 }, { "epoch": 0.6521475908720324, "grad_norm": 0.9137416481971741, "learning_rate": 6.961705289855658e-06, "loss": 2.852933883666992, "step": 80790 }, { "epoch": 0.6522283121977995, "grad_norm": 0.7885671257972717, "learning_rate": 6.960089820117445e-06, "loss": 3.1206308364868165, "step": 80800 }, { "epoch": 0.6523090335235666, "grad_norm": 0.7810404896736145, "learning_rate": 6.958474350379232e-06, "loss": 2.5770685195922853, "step": 80810 }, { "epoch": 0.6523897548493336, "grad_norm": 1.1820429563522339, "learning_rate": 6.956858880641019e-06, "loss": 2.508777046203613, "step": 80820 }, { "epoch": 0.6524704761751007, "grad_norm": 1.093231201171875, "learning_rate": 6.9552434109028055e-06, "loss": 2.4228918075561525, "step": 80830 }, { "epoch": 0.6525511975008678, "grad_norm": 0.8310055136680603, "learning_rate": 6.953627941164592e-06, "loss": 2.6212690353393553, "step": 80840 }, { "epoch": 0.6526319188266348, "grad_norm": 0.7281630635261536, "learning_rate": 6.952012471426379e-06, "loss": 2.687663269042969, "step": 80850 }, { "epoch": 0.6527126401524018, "grad_norm": 0.699638843536377, "learning_rate": 6.950397001688166e-06, "loss": 3.026067543029785, "step": 80860 }, { "epoch": 0.6527933614781689, "grad_norm": 0.8916162252426147, "learning_rate": 6.948781531949953e-06, "loss": 2.9285915374755858, "step": 80870 }, { "epoch": 0.652874082803936, "grad_norm": 1.5246576070785522, "learning_rate": 6.94716606221174e-06, "loss": 3.0050222396850588, "step": 80880 }, { "epoch": 0.6529548041297031, "grad_norm": 1.0163923501968384, "learning_rate": 6.9455505924735265e-06, "loss": 2.801197624206543, "step": 80890 }, { "epoch": 0.6530355254554701, "grad_norm": 0.9669601321220398, "learning_rate": 6.943935122735313e-06, "loss": 2.425563621520996, "step": 80900 }, { "epoch": 0.6531162467812371, "grad_norm": 0.7318858504295349, "learning_rate": 6.9423196529971e-06, "loss": 2.8539215087890626, "step": 80910 }, { "epoch": 0.6531969681070042, "grad_norm": 1.6749986410140991, "learning_rate": 6.940704183258887e-06, "loss": 2.776455879211426, "step": 80920 }, { "epoch": 0.6532776894327712, "grad_norm": 1.259447693824768, "learning_rate": 6.939088713520674e-06, "loss": 3.0619670867919924, "step": 80930 }, { "epoch": 0.6533584107585383, "grad_norm": 1.165441870689392, "learning_rate": 6.937473243782461e-06, "loss": 2.3987361907958986, "step": 80940 }, { "epoch": 0.6534391320843054, "grad_norm": 1.5174126625061035, "learning_rate": 6.935857774044248e-06, "loss": 2.432417106628418, "step": 80950 }, { "epoch": 0.6535198534100725, "grad_norm": 0.7632701396942139, "learning_rate": 6.9342423043060345e-06, "loss": 2.9658910751342775, "step": 80960 }, { "epoch": 0.6536005747358394, "grad_norm": 0.8149229884147644, "learning_rate": 6.932626834567821e-06, "loss": 3.149898147583008, "step": 80970 }, { "epoch": 0.6536812960616065, "grad_norm": 1.833765983581543, "learning_rate": 6.931011364829608e-06, "loss": 3.2230449676513673, "step": 80980 }, { "epoch": 0.6537620173873736, "grad_norm": 0.9556238651275635, "learning_rate": 6.929395895091395e-06, "loss": 2.503933906555176, "step": 80990 }, { "epoch": 0.6538427387131406, "grad_norm": 0.9148478507995605, "learning_rate": 6.927780425353182e-06, "loss": 2.805731010437012, "step": 81000 }, { "epoch": 0.6539234600389077, "grad_norm": 1.3208982944488525, "learning_rate": 6.926164955614969e-06, "loss": 2.926186752319336, "step": 81010 }, { "epoch": 0.6540041813646748, "grad_norm": 0.9232689738273621, "learning_rate": 6.9245494858767555e-06, "loss": 2.809945487976074, "step": 81020 }, { "epoch": 0.6540849026904418, "grad_norm": 1.4824634790420532, "learning_rate": 6.922934016138542e-06, "loss": 2.4452646255493162, "step": 81030 }, { "epoch": 0.6541656240162088, "grad_norm": 1.3979041576385498, "learning_rate": 6.921318546400329e-06, "loss": 2.874517822265625, "step": 81040 }, { "epoch": 0.6542463453419759, "grad_norm": 0.4915398359298706, "learning_rate": 6.919703076662117e-06, "loss": 2.7573509216308594, "step": 81050 }, { "epoch": 0.654327066667743, "grad_norm": 1.4065264463424683, "learning_rate": 6.918087606923904e-06, "loss": 2.695931243896484, "step": 81060 }, { "epoch": 0.65440778799351, "grad_norm": 0.7217715382575989, "learning_rate": 6.916472137185691e-06, "loss": 3.5294281005859376, "step": 81070 }, { "epoch": 0.654488509319277, "grad_norm": 0.7463968396186829, "learning_rate": 6.9148566674474774e-06, "loss": 2.642464447021484, "step": 81080 }, { "epoch": 0.6545692306450441, "grad_norm": 1.2157871723175049, "learning_rate": 6.913241197709264e-06, "loss": 2.8049222946166994, "step": 81090 }, { "epoch": 0.6546499519708112, "grad_norm": 1.0777380466461182, "learning_rate": 6.911625727971051e-06, "loss": 2.7923702239990233, "step": 81100 }, { "epoch": 0.6547306732965782, "grad_norm": 0.8439286351203918, "learning_rate": 6.910010258232838e-06, "loss": 2.5815250396728517, "step": 81110 }, { "epoch": 0.6548113946223453, "grad_norm": 0.612426221370697, "learning_rate": 6.908394788494626e-06, "loss": 2.6562196731567385, "step": 81120 }, { "epoch": 0.6548921159481124, "grad_norm": 1.098140835762024, "learning_rate": 6.9067793187564125e-06, "loss": 2.830231475830078, "step": 81130 }, { "epoch": 0.6549728372738793, "grad_norm": 0.782280445098877, "learning_rate": 6.905163849018199e-06, "loss": 2.633309173583984, "step": 81140 }, { "epoch": 0.6550535585996464, "grad_norm": 0.9130580425262451, "learning_rate": 6.903548379279986e-06, "loss": 2.582338333129883, "step": 81150 }, { "epoch": 0.6551342799254135, "grad_norm": 0.5227498412132263, "learning_rate": 6.901932909541773e-06, "loss": 2.8401256561279298, "step": 81160 }, { "epoch": 0.6552150012511806, "grad_norm": 0.8548941612243652, "learning_rate": 6.90031743980356e-06, "loss": 2.756144142150879, "step": 81170 }, { "epoch": 0.6552957225769476, "grad_norm": 1.1348003149032593, "learning_rate": 6.898701970065347e-06, "loss": 2.9120826721191406, "step": 81180 }, { "epoch": 0.6553764439027147, "grad_norm": 0.9208127856254578, "learning_rate": 6.897086500327134e-06, "loss": 2.537843704223633, "step": 81190 }, { "epoch": 0.6554571652284817, "grad_norm": 1.2277307510375977, "learning_rate": 6.8954710305889204e-06, "loss": 2.7152612686157225, "step": 81200 }, { "epoch": 0.6555378865542487, "grad_norm": 1.0108349323272705, "learning_rate": 6.893855560850707e-06, "loss": 2.4875940322875976, "step": 81210 }, { "epoch": 0.6556186078800158, "grad_norm": 0.8389356732368469, "learning_rate": 6.892240091112494e-06, "loss": 2.594722938537598, "step": 81220 }, { "epoch": 0.6556993292057829, "grad_norm": 1.313554048538208, "learning_rate": 6.890624621374281e-06, "loss": 2.7135202407836916, "step": 81230 }, { "epoch": 0.65578005053155, "grad_norm": 0.7565245032310486, "learning_rate": 6.889009151636068e-06, "loss": 2.3887712478637697, "step": 81240 }, { "epoch": 0.655860771857317, "grad_norm": 0.6961471438407898, "learning_rate": 6.887393681897855e-06, "loss": 2.9447132110595704, "step": 81250 }, { "epoch": 0.655941493183084, "grad_norm": 0.9045742154121399, "learning_rate": 6.8857782121596415e-06, "loss": 2.6203378677368163, "step": 81260 }, { "epoch": 0.6560222145088511, "grad_norm": 0.8148747682571411, "learning_rate": 6.884162742421428e-06, "loss": 3.192367935180664, "step": 81270 }, { "epoch": 0.6561029358346181, "grad_norm": 0.5790435671806335, "learning_rate": 6.882547272683215e-06, "loss": 2.986600875854492, "step": 81280 }, { "epoch": 0.6561836571603852, "grad_norm": 0.5712273120880127, "learning_rate": 6.880931802945002e-06, "loss": 2.7427013397216795, "step": 81290 }, { "epoch": 0.6562643784861523, "grad_norm": 0.8594319224357605, "learning_rate": 6.879316333206789e-06, "loss": 2.9483028411865235, "step": 81300 }, { "epoch": 0.6563450998119194, "grad_norm": 0.8296020030975342, "learning_rate": 6.877700863468576e-06, "loss": 2.931669998168945, "step": 81310 }, { "epoch": 0.6564258211376863, "grad_norm": 0.8497762680053711, "learning_rate": 6.876085393730363e-06, "loss": 2.8434566497802733, "step": 81320 }, { "epoch": 0.6565065424634534, "grad_norm": 1.0386157035827637, "learning_rate": 6.8744699239921494e-06, "loss": 2.9255767822265626, "step": 81330 }, { "epoch": 0.6565872637892205, "grad_norm": 0.8083868622779846, "learning_rate": 6.872854454253936e-06, "loss": 2.446506690979004, "step": 81340 }, { "epoch": 0.6566679851149875, "grad_norm": 1.0086750984191895, "learning_rate": 6.871238984515723e-06, "loss": 2.913224983215332, "step": 81350 }, { "epoch": 0.6567487064407546, "grad_norm": 1.4652433395385742, "learning_rate": 6.86962351477751e-06, "loss": 2.7408672332763673, "step": 81360 }, { "epoch": 0.6568294277665216, "grad_norm": 0.6708475351333618, "learning_rate": 6.868008045039297e-06, "loss": 2.706415557861328, "step": 81370 }, { "epoch": 0.6569101490922887, "grad_norm": 0.8462691903114319, "learning_rate": 6.866392575301084e-06, "loss": 2.386297035217285, "step": 81380 }, { "epoch": 0.6569908704180557, "grad_norm": 0.9686924815177917, "learning_rate": 6.8647771055628705e-06, "loss": 3.057703399658203, "step": 81390 }, { "epoch": 0.6570715917438228, "grad_norm": 1.0149801969528198, "learning_rate": 6.863161635824657e-06, "loss": 2.9543956756591796, "step": 81400 }, { "epoch": 0.6571523130695899, "grad_norm": 1.3684935569763184, "learning_rate": 6.861546166086445e-06, "loss": 2.7658966064453123, "step": 81410 }, { "epoch": 0.657233034395357, "grad_norm": 0.6512994766235352, "learning_rate": 6.859930696348232e-06, "loss": 3.305149459838867, "step": 81420 }, { "epoch": 0.6573137557211239, "grad_norm": 0.8699813485145569, "learning_rate": 6.858315226610019e-06, "loss": 2.8327573776245116, "step": 81430 }, { "epoch": 0.657394477046891, "grad_norm": 1.5402181148529053, "learning_rate": 6.856699756871806e-06, "loss": 2.9354364395141603, "step": 81440 }, { "epoch": 0.6574751983726581, "grad_norm": 0.8814566135406494, "learning_rate": 6.8550842871335924e-06, "loss": 2.964737319946289, "step": 81450 }, { "epoch": 0.6575559196984251, "grad_norm": 1.490235447883606, "learning_rate": 6.853468817395379e-06, "loss": 2.9212467193603517, "step": 81460 }, { "epoch": 0.6576366410241922, "grad_norm": 0.9488519430160522, "learning_rate": 6.851853347657166e-06, "loss": 2.9849843978881836, "step": 81470 }, { "epoch": 0.6577173623499593, "grad_norm": 1.1897664070129395, "learning_rate": 6.850237877918953e-06, "loss": 3.3260452270507814, "step": 81480 }, { "epoch": 0.6577980836757263, "grad_norm": 0.7953249216079712, "learning_rate": 6.84862240818074e-06, "loss": 2.3652423858642577, "step": 81490 }, { "epoch": 0.6578788050014933, "grad_norm": 1.0144962072372437, "learning_rate": 6.847006938442527e-06, "loss": 2.702159881591797, "step": 81500 }, { "epoch": 0.6579595263272604, "grad_norm": 1.2739554643630981, "learning_rate": 6.8453914687043135e-06, "loss": 2.7294937133789063, "step": 81510 }, { "epoch": 0.6580402476530275, "grad_norm": 1.806367039680481, "learning_rate": 6.8437759989661e-06, "loss": 2.899898910522461, "step": 81520 }, { "epoch": 0.6581209689787945, "grad_norm": 0.8435367941856384, "learning_rate": 6.842160529227887e-06, "loss": 2.5236743927001952, "step": 81530 }, { "epoch": 0.6582016903045615, "grad_norm": 1.0516035556793213, "learning_rate": 6.840545059489674e-06, "loss": 2.5597824096679687, "step": 81540 }, { "epoch": 0.6582824116303286, "grad_norm": 0.9608477354049683, "learning_rate": 6.838929589751461e-06, "loss": 2.7871366500854493, "step": 81550 }, { "epoch": 0.6583631329560957, "grad_norm": 0.568869411945343, "learning_rate": 6.837314120013248e-06, "loss": 3.021568489074707, "step": 81560 }, { "epoch": 0.6584438542818627, "grad_norm": 0.8064951300621033, "learning_rate": 6.835698650275035e-06, "loss": 2.6858726501464845, "step": 81570 }, { "epoch": 0.6585245756076298, "grad_norm": 1.5730372667312622, "learning_rate": 6.8340831805368214e-06, "loss": 2.895785903930664, "step": 81580 }, { "epoch": 0.6586052969333969, "grad_norm": 0.9741240739822388, "learning_rate": 6.832467710798608e-06, "loss": 2.8674343109130858, "step": 81590 }, { "epoch": 0.6586860182591638, "grad_norm": 1.054268479347229, "learning_rate": 6.830852241060395e-06, "loss": 2.380690574645996, "step": 81600 }, { "epoch": 0.6587667395849309, "grad_norm": 0.9789431691169739, "learning_rate": 6.829236771322182e-06, "loss": 2.82877140045166, "step": 81610 }, { "epoch": 0.658847460910698, "grad_norm": 0.8426504135131836, "learning_rate": 6.827621301583969e-06, "loss": 2.562150001525879, "step": 81620 }, { "epoch": 0.6589281822364651, "grad_norm": 0.9541256427764893, "learning_rate": 6.826005831845756e-06, "loss": 2.6533960342407226, "step": 81630 }, { "epoch": 0.6590089035622321, "grad_norm": 1.1704624891281128, "learning_rate": 6.8243903621075425e-06, "loss": 2.400558853149414, "step": 81640 }, { "epoch": 0.6590896248879992, "grad_norm": 0.8634603023529053, "learning_rate": 6.822774892369329e-06, "loss": 2.616545867919922, "step": 81650 }, { "epoch": 0.6591703462137662, "grad_norm": 1.5268174409866333, "learning_rate": 6.821159422631116e-06, "loss": 2.5793889999389648, "step": 81660 }, { "epoch": 0.6592510675395332, "grad_norm": 0.7424960136413574, "learning_rate": 6.819543952892903e-06, "loss": 2.61217041015625, "step": 81670 }, { "epoch": 0.6593317888653003, "grad_norm": 1.2455494403839111, "learning_rate": 6.81792848315469e-06, "loss": 2.6849918365478516, "step": 81680 }, { "epoch": 0.6594125101910674, "grad_norm": 1.1401300430297852, "learning_rate": 6.816313013416477e-06, "loss": 2.511952018737793, "step": 81690 }, { "epoch": 0.6594932315168345, "grad_norm": 1.2920103073120117, "learning_rate": 6.8146975436782636e-06, "loss": 2.834807014465332, "step": 81700 }, { "epoch": 0.6595739528426015, "grad_norm": 0.7646840810775757, "learning_rate": 6.81308207394005e-06, "loss": 2.945970153808594, "step": 81710 }, { "epoch": 0.6596546741683685, "grad_norm": 1.256133794784546, "learning_rate": 6.811466604201837e-06, "loss": 2.5197973251342773, "step": 81720 }, { "epoch": 0.6597353954941356, "grad_norm": 0.8147668838500977, "learning_rate": 6.809851134463624e-06, "loss": 3.219843292236328, "step": 81730 }, { "epoch": 0.6598161168199026, "grad_norm": 0.9009051322937012, "learning_rate": 6.808235664725411e-06, "loss": 3.115155029296875, "step": 81740 }, { "epoch": 0.6598968381456697, "grad_norm": 1.039380431175232, "learning_rate": 6.806620194987198e-06, "loss": 2.575634765625, "step": 81750 }, { "epoch": 0.6599775594714368, "grad_norm": 0.9718645215034485, "learning_rate": 6.805004725248985e-06, "loss": 2.521068000793457, "step": 81760 }, { "epoch": 0.6600582807972039, "grad_norm": 1.371200442314148, "learning_rate": 6.8033892555107715e-06, "loss": 2.45048828125, "step": 81770 }, { "epoch": 0.6601390021229708, "grad_norm": 2.035709857940674, "learning_rate": 6.801773785772558e-06, "loss": 2.4928565979003907, "step": 81780 }, { "epoch": 0.6602197234487379, "grad_norm": 1.0033128261566162, "learning_rate": 6.800158316034345e-06, "loss": 2.7010847091674806, "step": 81790 }, { "epoch": 0.660300444774505, "grad_norm": 0.8230739831924438, "learning_rate": 6.798542846296132e-06, "loss": 2.741726875305176, "step": 81800 }, { "epoch": 0.660381166100272, "grad_norm": 1.071781039237976, "learning_rate": 6.796927376557919e-06, "loss": 2.922112464904785, "step": 81810 }, { "epoch": 0.6604618874260391, "grad_norm": 0.8153987526893616, "learning_rate": 6.795311906819706e-06, "loss": 2.830620765686035, "step": 81820 }, { "epoch": 0.6605426087518061, "grad_norm": 0.7704731822013855, "learning_rate": 6.7936964370814926e-06, "loss": 2.9155755996704102, "step": 81830 }, { "epoch": 0.6606233300775732, "grad_norm": 0.9644628167152405, "learning_rate": 6.792080967343279e-06, "loss": 2.709680938720703, "step": 81840 }, { "epoch": 0.6607040514033402, "grad_norm": 0.7999428510665894, "learning_rate": 6.790465497605066e-06, "loss": 2.596879577636719, "step": 81850 }, { "epoch": 0.6607847727291073, "grad_norm": 0.9190492630004883, "learning_rate": 6.788850027866853e-06, "loss": 3.008677864074707, "step": 81860 }, { "epoch": 0.6608654940548744, "grad_norm": 0.6086845993995667, "learning_rate": 6.78723455812864e-06, "loss": 2.87335262298584, "step": 81870 }, { "epoch": 0.6609462153806415, "grad_norm": 0.7589945793151855, "learning_rate": 6.785619088390427e-06, "loss": 2.3861162185668947, "step": 81880 }, { "epoch": 0.6610269367064084, "grad_norm": 3.326256513595581, "learning_rate": 6.784003618652214e-06, "loss": 3.6391082763671876, "step": 81890 }, { "epoch": 0.6611076580321755, "grad_norm": 1.0164142847061157, "learning_rate": 6.7823881489140005e-06, "loss": 2.551529884338379, "step": 81900 }, { "epoch": 0.6611883793579426, "grad_norm": 1.1790012121200562, "learning_rate": 6.780772679175787e-06, "loss": 2.3576635360717773, "step": 81910 }, { "epoch": 0.6612691006837096, "grad_norm": 1.236528992652893, "learning_rate": 6.779157209437575e-06, "loss": 2.6456636428833007, "step": 81920 }, { "epoch": 0.6613498220094767, "grad_norm": 1.1043341159820557, "learning_rate": 6.777541739699362e-06, "loss": 2.7330984115600585, "step": 81930 }, { "epoch": 0.6614305433352438, "grad_norm": 0.7984015941619873, "learning_rate": 6.775926269961149e-06, "loss": 2.712654685974121, "step": 81940 }, { "epoch": 0.6615112646610108, "grad_norm": 0.9916719198226929, "learning_rate": 6.7743108002229356e-06, "loss": 2.4975303649902343, "step": 81950 }, { "epoch": 0.6615919859867778, "grad_norm": 1.0758720636367798, "learning_rate": 6.772695330484722e-06, "loss": 3.404970169067383, "step": 81960 }, { "epoch": 0.6616727073125449, "grad_norm": 0.9538899660110474, "learning_rate": 6.771079860746509e-06, "loss": 2.6718418121337892, "step": 81970 }, { "epoch": 0.661753428638312, "grad_norm": 0.6445795893669128, "learning_rate": 6.769464391008296e-06, "loss": 3.0975446701049805, "step": 81980 }, { "epoch": 0.661834149964079, "grad_norm": 1.417146921157837, "learning_rate": 6.767848921270083e-06, "loss": 2.7744726181030273, "step": 81990 }, { "epoch": 0.661914871289846, "grad_norm": 1.3943452835083008, "learning_rate": 6.76623345153187e-06, "loss": 2.989505577087402, "step": 82000 }, { "epoch": 0.6619955926156131, "grad_norm": 0.8863731622695923, "learning_rate": 6.764617981793657e-06, "loss": 2.53798770904541, "step": 82010 }, { "epoch": 0.6620763139413802, "grad_norm": 1.0932374000549316, "learning_rate": 6.7630025120554435e-06, "loss": 3.858412170410156, "step": 82020 }, { "epoch": 0.6621570352671472, "grad_norm": 1.1199733018875122, "learning_rate": 6.76138704231723e-06, "loss": 2.5597501754760743, "step": 82030 }, { "epoch": 0.6622377565929143, "grad_norm": 1.0283139944076538, "learning_rate": 6.759771572579017e-06, "loss": 2.8528757095336914, "step": 82040 }, { "epoch": 0.6623184779186814, "grad_norm": 0.7556042075157166, "learning_rate": 6.758156102840804e-06, "loss": 2.6808815002441406, "step": 82050 }, { "epoch": 0.6623991992444483, "grad_norm": 0.8866422772407532, "learning_rate": 6.756540633102591e-06, "loss": 2.1939149856567384, "step": 82060 }, { "epoch": 0.6624799205702154, "grad_norm": 0.9413323998451233, "learning_rate": 6.754925163364378e-06, "loss": 2.522201156616211, "step": 82070 }, { "epoch": 0.6625606418959825, "grad_norm": 1.0473194122314453, "learning_rate": 6.7533096936261646e-06, "loss": 2.5401859283447266, "step": 82080 }, { "epoch": 0.6626413632217496, "grad_norm": 0.80257648229599, "learning_rate": 6.751694223887951e-06, "loss": 2.8353395462036133, "step": 82090 }, { "epoch": 0.6627220845475166, "grad_norm": 1.070652723312378, "learning_rate": 6.750078754149738e-06, "loss": 2.6164003372192384, "step": 82100 }, { "epoch": 0.6628028058732837, "grad_norm": 1.210663914680481, "learning_rate": 6.748463284411525e-06, "loss": 2.4354257583618164, "step": 82110 }, { "epoch": 0.6628835271990507, "grad_norm": 1.2336634397506714, "learning_rate": 6.746847814673312e-06, "loss": 2.774247932434082, "step": 82120 }, { "epoch": 0.6629642485248177, "grad_norm": 1.0901546478271484, "learning_rate": 6.745232344935099e-06, "loss": 2.804912567138672, "step": 82130 }, { "epoch": 0.6630449698505848, "grad_norm": 1.343440294265747, "learning_rate": 6.743616875196886e-06, "loss": 2.4934484481811525, "step": 82140 }, { "epoch": 0.6631256911763519, "grad_norm": 0.8351832628250122, "learning_rate": 6.7420014054586725e-06, "loss": 2.736014175415039, "step": 82150 }, { "epoch": 0.663206412502119, "grad_norm": 0.7346866130828857, "learning_rate": 6.740385935720459e-06, "loss": 3.124318313598633, "step": 82160 }, { "epoch": 0.663287133827886, "grad_norm": 0.9463285803794861, "learning_rate": 6.738770465982246e-06, "loss": 2.819292449951172, "step": 82170 }, { "epoch": 0.663367855153653, "grad_norm": 0.8605131506919861, "learning_rate": 6.737154996244033e-06, "loss": 2.9702281951904297, "step": 82180 }, { "epoch": 0.6634485764794201, "grad_norm": 0.6847196221351624, "learning_rate": 6.73553952650582e-06, "loss": 2.949227523803711, "step": 82190 }, { "epoch": 0.6635292978051871, "grad_norm": 1.3184422254562378, "learning_rate": 6.733924056767607e-06, "loss": 2.690188407897949, "step": 82200 }, { "epoch": 0.6636100191309542, "grad_norm": 1.138486623764038, "learning_rate": 6.7323085870293936e-06, "loss": 2.3884729385375976, "step": 82210 }, { "epoch": 0.6636907404567213, "grad_norm": 0.7245583534240723, "learning_rate": 6.73069311729118e-06, "loss": 2.481344985961914, "step": 82220 }, { "epoch": 0.6637714617824884, "grad_norm": 0.9733712673187256, "learning_rate": 6.729077647552967e-06, "loss": 2.506183052062988, "step": 82230 }, { "epoch": 0.6638521831082553, "grad_norm": 1.4054429531097412, "learning_rate": 6.727462177814754e-06, "loss": 2.577555847167969, "step": 82240 }, { "epoch": 0.6639329044340224, "grad_norm": 0.7283883094787598, "learning_rate": 6.725846708076541e-06, "loss": 2.5155284881591795, "step": 82250 }, { "epoch": 0.6640136257597895, "grad_norm": 1.1400507688522339, "learning_rate": 6.724231238338328e-06, "loss": 2.4873186111450196, "step": 82260 }, { "epoch": 0.6640943470855565, "grad_norm": 0.7521345615386963, "learning_rate": 6.722615768600115e-06, "loss": 2.9297231674194335, "step": 82270 }, { "epoch": 0.6641750684113236, "grad_norm": 1.2095264196395874, "learning_rate": 6.7210002988619015e-06, "loss": 2.450339508056641, "step": 82280 }, { "epoch": 0.6642557897370907, "grad_norm": 0.8204910755157471, "learning_rate": 6.719384829123688e-06, "loss": 2.1487005233764647, "step": 82290 }, { "epoch": 0.6643365110628577, "grad_norm": 0.82548588514328, "learning_rate": 6.717769359385475e-06, "loss": 2.8151817321777344, "step": 82300 }, { "epoch": 0.6644172323886247, "grad_norm": 0.6840569972991943, "learning_rate": 6.716153889647262e-06, "loss": 2.7427909851074217, "step": 82310 }, { "epoch": 0.6644979537143918, "grad_norm": 1.0859090089797974, "learning_rate": 6.714538419909049e-06, "loss": 2.6534038543701173, "step": 82320 }, { "epoch": 0.6645786750401589, "grad_norm": 0.8966537117958069, "learning_rate": 6.712922950170836e-06, "loss": 2.7261051177978515, "step": 82330 }, { "epoch": 0.6646593963659259, "grad_norm": 0.7415098547935486, "learning_rate": 6.7113074804326225e-06, "loss": 2.783817100524902, "step": 82340 }, { "epoch": 0.6647401176916929, "grad_norm": 1.5226325988769531, "learning_rate": 6.709692010694409e-06, "loss": 2.7644649505615235, "step": 82350 }, { "epoch": 0.66482083901746, "grad_norm": 0.916194498538971, "learning_rate": 6.708076540956196e-06, "loss": 2.903362846374512, "step": 82360 }, { "epoch": 0.6649015603432271, "grad_norm": 0.6176349520683289, "learning_rate": 6.706461071217985e-06, "loss": 2.3750017166137694, "step": 82370 }, { "epoch": 0.6649822816689941, "grad_norm": 0.7143678069114685, "learning_rate": 6.704845601479772e-06, "loss": 2.5149946212768555, "step": 82380 }, { "epoch": 0.6650630029947612, "grad_norm": 0.7085672616958618, "learning_rate": 6.7032301317415585e-06, "loss": 2.91719970703125, "step": 82390 }, { "epoch": 0.6651437243205283, "grad_norm": 0.8418422341346741, "learning_rate": 6.701614662003345e-06, "loss": 2.693123435974121, "step": 82400 }, { "epoch": 0.6652244456462953, "grad_norm": 0.7118446826934814, "learning_rate": 6.699999192265132e-06, "loss": 2.8060415267944334, "step": 82410 }, { "epoch": 0.6653051669720623, "grad_norm": 0.9115472435951233, "learning_rate": 6.698383722526919e-06, "loss": 2.736497688293457, "step": 82420 }, { "epoch": 0.6653858882978294, "grad_norm": 1.1446137428283691, "learning_rate": 6.696768252788706e-06, "loss": 2.497013282775879, "step": 82430 }, { "epoch": 0.6654666096235965, "grad_norm": 1.561851143836975, "learning_rate": 6.695152783050493e-06, "loss": 2.8284473419189453, "step": 82440 }, { "epoch": 0.6655473309493635, "grad_norm": 1.0886081457138062, "learning_rate": 6.6935373133122795e-06, "loss": 2.5189218521118164, "step": 82450 }, { "epoch": 0.6656280522751306, "grad_norm": 1.0373008251190186, "learning_rate": 6.691921843574066e-06, "loss": 3.0240930557250976, "step": 82460 }, { "epoch": 0.6657087736008976, "grad_norm": 0.6012349724769592, "learning_rate": 6.690306373835853e-06, "loss": 2.576144981384277, "step": 82470 }, { "epoch": 0.6657894949266647, "grad_norm": 0.8639417886734009, "learning_rate": 6.68869090409764e-06, "loss": 2.850465774536133, "step": 82480 }, { "epoch": 0.6658702162524317, "grad_norm": 0.9450039863586426, "learning_rate": 6.687075434359427e-06, "loss": 2.8342473983764647, "step": 82490 }, { "epoch": 0.6659509375781988, "grad_norm": 0.811549961566925, "learning_rate": 6.685459964621214e-06, "loss": 3.3640377044677736, "step": 82500 }, { "epoch": 0.6660316589039659, "grad_norm": 1.0255407094955444, "learning_rate": 6.683844494883001e-06, "loss": 2.479593276977539, "step": 82510 }, { "epoch": 0.6661123802297328, "grad_norm": 1.049407958984375, "learning_rate": 6.6822290251447875e-06, "loss": 2.7609472274780273, "step": 82520 }, { "epoch": 0.6661931015554999, "grad_norm": 0.7124601006507874, "learning_rate": 6.680613555406574e-06, "loss": 2.6979650497436523, "step": 82530 }, { "epoch": 0.666273822881267, "grad_norm": 1.1735762357711792, "learning_rate": 6.678998085668361e-06, "loss": 2.561970329284668, "step": 82540 }, { "epoch": 0.6663545442070341, "grad_norm": 1.8223694562911987, "learning_rate": 6.677382615930148e-06, "loss": 2.5529842376708984, "step": 82550 }, { "epoch": 0.6664352655328011, "grad_norm": 1.494564414024353, "learning_rate": 6.675767146191935e-06, "loss": 2.4204254150390625, "step": 82560 }, { "epoch": 0.6665159868585682, "grad_norm": 0.8308391571044922, "learning_rate": 6.674151676453722e-06, "loss": 2.8437021255493162, "step": 82570 }, { "epoch": 0.6665967081843353, "grad_norm": 1.0650063753128052, "learning_rate": 6.6725362067155085e-06, "loss": 2.2843433380126954, "step": 82580 }, { "epoch": 0.6666774295101022, "grad_norm": 0.9946750402450562, "learning_rate": 6.670920736977295e-06, "loss": 2.396717834472656, "step": 82590 }, { "epoch": 0.6667581508358693, "grad_norm": 0.9686405658721924, "learning_rate": 6.669305267239082e-06, "loss": 2.5852323532104493, "step": 82600 }, { "epoch": 0.6668388721616364, "grad_norm": 1.3064258098602295, "learning_rate": 6.667689797500869e-06, "loss": 2.812165451049805, "step": 82610 }, { "epoch": 0.6669195934874035, "grad_norm": 0.8603554368019104, "learning_rate": 6.666074327762656e-06, "loss": 2.2587188720703124, "step": 82620 }, { "epoch": 0.6670003148131705, "grad_norm": 1.0137912034988403, "learning_rate": 6.664458858024443e-06, "loss": 2.5623405456542967, "step": 82630 }, { "epoch": 0.6670810361389375, "grad_norm": 0.6463884115219116, "learning_rate": 6.66284338828623e-06, "loss": 2.653421974182129, "step": 82640 }, { "epoch": 0.6671617574647046, "grad_norm": 1.2112884521484375, "learning_rate": 6.6612279185480165e-06, "loss": 3.2735809326171874, "step": 82650 }, { "epoch": 0.6672424787904716, "grad_norm": 1.0853885412216187, "learning_rate": 6.659612448809803e-06, "loss": 3.097865867614746, "step": 82660 }, { "epoch": 0.6673232001162387, "grad_norm": 0.9387573003768921, "learning_rate": 6.65799697907159e-06, "loss": 2.5473119735717775, "step": 82670 }, { "epoch": 0.6674039214420058, "grad_norm": 1.0041422843933105, "learning_rate": 6.656381509333377e-06, "loss": 2.8107206344604494, "step": 82680 }, { "epoch": 0.6674846427677729, "grad_norm": 1.0889430046081543, "learning_rate": 6.654766039595164e-06, "loss": 2.694064140319824, "step": 82690 }, { "epoch": 0.6675653640935398, "grad_norm": 0.804857611656189, "learning_rate": 6.653150569856951e-06, "loss": 2.407949447631836, "step": 82700 }, { "epoch": 0.6676460854193069, "grad_norm": 0.7956077456474304, "learning_rate": 6.6515351001187375e-06, "loss": 2.9450254440307617, "step": 82710 }, { "epoch": 0.667726806745074, "grad_norm": 0.9290469884872437, "learning_rate": 6.649919630380524e-06, "loss": 2.6080862045288087, "step": 82720 }, { "epoch": 0.667807528070841, "grad_norm": 1.0421342849731445, "learning_rate": 6.648304160642311e-06, "loss": 2.66156005859375, "step": 82730 }, { "epoch": 0.6678882493966081, "grad_norm": 0.9317893385887146, "learning_rate": 6.646688690904098e-06, "loss": 2.4523927688598635, "step": 82740 }, { "epoch": 0.6679689707223752, "grad_norm": 1.2302052974700928, "learning_rate": 6.645073221165885e-06, "loss": 2.428336334228516, "step": 82750 }, { "epoch": 0.6680496920481422, "grad_norm": 0.9719445109367371, "learning_rate": 6.643457751427672e-06, "loss": 3.0194826126098633, "step": 82760 }, { "epoch": 0.6681304133739092, "grad_norm": 0.941655158996582, "learning_rate": 6.641842281689459e-06, "loss": 2.3443376541137697, "step": 82770 }, { "epoch": 0.6682111346996763, "grad_norm": 0.9763123989105225, "learning_rate": 6.6402268119512455e-06, "loss": 2.5151695251464843, "step": 82780 }, { "epoch": 0.6682918560254434, "grad_norm": 1.1216577291488647, "learning_rate": 6.638611342213032e-06, "loss": 2.4361114501953125, "step": 82790 }, { "epoch": 0.6683725773512104, "grad_norm": 1.027022123336792, "learning_rate": 6.63699587247482e-06, "loss": 2.8672147750854493, "step": 82800 }, { "epoch": 0.6684532986769774, "grad_norm": 0.9260404706001282, "learning_rate": 6.635380402736607e-06, "loss": 2.7444549560546876, "step": 82810 }, { "epoch": 0.6685340200027445, "grad_norm": 0.9477510452270508, "learning_rate": 6.633764932998394e-06, "loss": 2.8334253311157225, "step": 82820 }, { "epoch": 0.6686147413285116, "grad_norm": 1.3053282499313354, "learning_rate": 6.6321494632601805e-06, "loss": 2.6707630157470703, "step": 82830 }, { "epoch": 0.6686954626542786, "grad_norm": 0.9781056046485901, "learning_rate": 6.630533993521967e-06, "loss": 2.986782455444336, "step": 82840 }, { "epoch": 0.6687761839800457, "grad_norm": 0.8992201685905457, "learning_rate": 6.628918523783754e-06, "loss": 2.4981204986572267, "step": 82850 }, { "epoch": 0.6688569053058128, "grad_norm": 1.0518510341644287, "learning_rate": 6.627303054045541e-06, "loss": 2.51946964263916, "step": 82860 }, { "epoch": 0.6689376266315799, "grad_norm": 1.0385595560073853, "learning_rate": 6.625687584307328e-06, "loss": 2.4947086334228517, "step": 82870 }, { "epoch": 0.6690183479573468, "grad_norm": 1.0441092252731323, "learning_rate": 6.624072114569115e-06, "loss": 2.7444944381713867, "step": 82880 }, { "epoch": 0.6690990692831139, "grad_norm": 1.7521260976791382, "learning_rate": 6.622456644830902e-06, "loss": 3.1556657791137694, "step": 82890 }, { "epoch": 0.669179790608881, "grad_norm": 0.9079926013946533, "learning_rate": 6.6208411750926884e-06, "loss": 2.7734683990478515, "step": 82900 }, { "epoch": 0.669260511934648, "grad_norm": 0.8813535571098328, "learning_rate": 6.619225705354475e-06, "loss": 2.2873653411865233, "step": 82910 }, { "epoch": 0.6693412332604151, "grad_norm": 0.7103505730628967, "learning_rate": 6.617610235616262e-06, "loss": 2.77415771484375, "step": 82920 }, { "epoch": 0.6694219545861821, "grad_norm": 1.1910626888275146, "learning_rate": 6.615994765878049e-06, "loss": 2.3167495727539062, "step": 82930 }, { "epoch": 0.6695026759119492, "grad_norm": 0.656619131565094, "learning_rate": 6.614379296139836e-06, "loss": 2.425151062011719, "step": 82940 }, { "epoch": 0.6695833972377162, "grad_norm": 0.6658596992492676, "learning_rate": 6.612763826401623e-06, "loss": 2.404729652404785, "step": 82950 }, { "epoch": 0.6696641185634833, "grad_norm": 0.6781814098358154, "learning_rate": 6.6111483566634095e-06, "loss": 2.8673093795776365, "step": 82960 }, { "epoch": 0.6697448398892504, "grad_norm": 0.8161017894744873, "learning_rate": 6.609532886925196e-06, "loss": 2.8880590438842773, "step": 82970 }, { "epoch": 0.6698255612150174, "grad_norm": 0.8556073307991028, "learning_rate": 6.607917417186983e-06, "loss": 2.5371820449829103, "step": 82980 }, { "epoch": 0.6699062825407844, "grad_norm": 0.6716298460960388, "learning_rate": 6.60630194744877e-06, "loss": 2.3769712448120117, "step": 82990 }, { "epoch": 0.6699870038665515, "grad_norm": 1.1774731874465942, "learning_rate": 6.604686477710557e-06, "loss": 2.572041320800781, "step": 83000 }, { "epoch": 0.6700677251923186, "grad_norm": 0.6472607851028442, "learning_rate": 6.603071007972344e-06, "loss": 2.5217647552490234, "step": 83010 }, { "epoch": 0.6701484465180856, "grad_norm": 1.2110867500305176, "learning_rate": 6.601455538234131e-06, "loss": 2.684562873840332, "step": 83020 }, { "epoch": 0.6702291678438527, "grad_norm": 2.0359740257263184, "learning_rate": 6.5998400684959174e-06, "loss": 2.4210432052612303, "step": 83030 }, { "epoch": 0.6703098891696198, "grad_norm": 1.154570460319519, "learning_rate": 6.598224598757704e-06, "loss": 2.706715774536133, "step": 83040 }, { "epoch": 0.6703906104953867, "grad_norm": 1.213505744934082, "learning_rate": 6.596609129019491e-06, "loss": 2.6399953842163084, "step": 83050 }, { "epoch": 0.6704713318211538, "grad_norm": 1.4244005680084229, "learning_rate": 6.594993659281278e-06, "loss": 2.7154197692871094, "step": 83060 }, { "epoch": 0.6705520531469209, "grad_norm": 1.230114459991455, "learning_rate": 6.593378189543065e-06, "loss": 2.575921821594238, "step": 83070 }, { "epoch": 0.670632774472688, "grad_norm": 1.153717041015625, "learning_rate": 6.591762719804852e-06, "loss": 2.7635734558105467, "step": 83080 }, { "epoch": 0.670713495798455, "grad_norm": 0.6459276676177979, "learning_rate": 6.5901472500666385e-06, "loss": 2.5845657348632813, "step": 83090 }, { "epoch": 0.670794217124222, "grad_norm": 1.2384915351867676, "learning_rate": 6.588531780328425e-06, "loss": 2.401171875, "step": 83100 }, { "epoch": 0.6708749384499891, "grad_norm": 0.8791477084159851, "learning_rate": 6.586916310590212e-06, "loss": 2.469534492492676, "step": 83110 }, { "epoch": 0.6709556597757561, "grad_norm": 1.1964399814605713, "learning_rate": 6.585300840851999e-06, "loss": 2.8277454376220703, "step": 83120 }, { "epoch": 0.6710363811015232, "grad_norm": 0.6082454323768616, "learning_rate": 6.583685371113786e-06, "loss": 3.1526187896728515, "step": 83130 }, { "epoch": 0.6711171024272903, "grad_norm": 1.0298129320144653, "learning_rate": 6.582069901375573e-06, "loss": 2.6625593185424803, "step": 83140 }, { "epoch": 0.6711978237530574, "grad_norm": 0.937691330909729, "learning_rate": 6.58045443163736e-06, "loss": 2.6217061996459963, "step": 83150 }, { "epoch": 0.6712785450788243, "grad_norm": 0.7199431657791138, "learning_rate": 6.5788389618991464e-06, "loss": 2.7117889404296873, "step": 83160 }, { "epoch": 0.6713592664045914, "grad_norm": 0.7301315069198608, "learning_rate": 6.577223492160933e-06, "loss": 2.6691680908203126, "step": 83170 }, { "epoch": 0.6714399877303585, "grad_norm": 0.6527705788612366, "learning_rate": 6.57560802242272e-06, "loss": 2.630413818359375, "step": 83180 }, { "epoch": 0.6715207090561255, "grad_norm": 0.8308941721916199, "learning_rate": 6.573992552684507e-06, "loss": 2.43851261138916, "step": 83190 }, { "epoch": 0.6716014303818926, "grad_norm": 0.7695600986480713, "learning_rate": 6.572377082946294e-06, "loss": 2.566996955871582, "step": 83200 }, { "epoch": 0.6716821517076597, "grad_norm": 0.8271307945251465, "learning_rate": 6.570761613208081e-06, "loss": 2.670648765563965, "step": 83210 }, { "epoch": 0.6717628730334267, "grad_norm": 0.7755569815635681, "learning_rate": 6.5691461434698675e-06, "loss": 2.6730690002441406, "step": 83220 }, { "epoch": 0.6718435943591937, "grad_norm": 0.982600212097168, "learning_rate": 6.567530673731654e-06, "loss": 2.905230712890625, "step": 83230 }, { "epoch": 0.6719243156849608, "grad_norm": 0.9259819984436035, "learning_rate": 6.565915203993441e-06, "loss": 2.229401969909668, "step": 83240 }, { "epoch": 0.6720050370107279, "grad_norm": 1.1227070093154907, "learning_rate": 6.564299734255228e-06, "loss": 2.6346357345581053, "step": 83250 }, { "epoch": 0.6720857583364949, "grad_norm": 1.2392680644989014, "learning_rate": 6.562684264517015e-06, "loss": 2.844058036804199, "step": 83260 }, { "epoch": 0.672166479662262, "grad_norm": 0.8583407998085022, "learning_rate": 6.561068794778802e-06, "loss": 2.7787601470947267, "step": 83270 }, { "epoch": 0.672247200988029, "grad_norm": 0.867264449596405, "learning_rate": 6.559453325040589e-06, "loss": 2.5736351013183594, "step": 83280 }, { "epoch": 0.6723279223137961, "grad_norm": 0.5129955410957336, "learning_rate": 6.5578378553023754e-06, "loss": 2.832487106323242, "step": 83290 }, { "epoch": 0.6724086436395631, "grad_norm": 0.8215610980987549, "learning_rate": 6.556222385564162e-06, "loss": 2.5737445831298826, "step": 83300 }, { "epoch": 0.6724893649653302, "grad_norm": 1.1363139152526855, "learning_rate": 6.55460691582595e-06, "loss": 2.5075525283813476, "step": 83310 }, { "epoch": 0.6725700862910973, "grad_norm": 1.296718716621399, "learning_rate": 6.552991446087737e-06, "loss": 2.589103126525879, "step": 83320 }, { "epoch": 0.6726508076168644, "grad_norm": 0.8178144693374634, "learning_rate": 6.551375976349524e-06, "loss": 2.481120300292969, "step": 83330 }, { "epoch": 0.6727315289426313, "grad_norm": 0.6568849086761475, "learning_rate": 6.5497605066113105e-06, "loss": 2.5766210556030273, "step": 83340 }, { "epoch": 0.6728122502683984, "grad_norm": 1.5876061916351318, "learning_rate": 6.548145036873097e-06, "loss": 2.425765609741211, "step": 83350 }, { "epoch": 0.6728929715941655, "grad_norm": 0.7752248644828796, "learning_rate": 6.546529567134884e-06, "loss": 2.459032440185547, "step": 83360 }, { "epoch": 0.6729736929199325, "grad_norm": 0.8951810002326965, "learning_rate": 6.544914097396671e-06, "loss": 2.608418273925781, "step": 83370 }, { "epoch": 0.6730544142456996, "grad_norm": 1.199763536453247, "learning_rate": 6.543298627658458e-06, "loss": 2.6283390045166017, "step": 83380 }, { "epoch": 0.6731351355714666, "grad_norm": 0.9227233529090881, "learning_rate": 6.541683157920245e-06, "loss": 2.985845947265625, "step": 83390 }, { "epoch": 0.6732158568972337, "grad_norm": 1.0042681694030762, "learning_rate": 6.540067688182032e-06, "loss": 2.830393409729004, "step": 83400 }, { "epoch": 0.6732965782230007, "grad_norm": 0.8557461500167847, "learning_rate": 6.538452218443818e-06, "loss": 3.330055999755859, "step": 83410 }, { "epoch": 0.6733772995487678, "grad_norm": 1.2794153690338135, "learning_rate": 6.536836748705605e-06, "loss": 2.546035385131836, "step": 83420 }, { "epoch": 0.6734580208745349, "grad_norm": 0.9588883519172668, "learning_rate": 6.535221278967392e-06, "loss": 2.8229253768920897, "step": 83430 }, { "epoch": 0.6735387422003019, "grad_norm": 1.3262412548065186, "learning_rate": 6.533605809229179e-06, "loss": 2.899209976196289, "step": 83440 }, { "epoch": 0.6736194635260689, "grad_norm": 0.8779447078704834, "learning_rate": 6.531990339490966e-06, "loss": 2.6220447540283205, "step": 83450 }, { "epoch": 0.673700184851836, "grad_norm": 1.9174972772598267, "learning_rate": 6.530374869752753e-06, "loss": 2.9047401428222654, "step": 83460 }, { "epoch": 0.6737809061776031, "grad_norm": 1.001291275024414, "learning_rate": 6.5287594000145395e-06, "loss": 2.8183717727661133, "step": 83470 }, { "epoch": 0.6738616275033701, "grad_norm": 1.7144221067428589, "learning_rate": 6.527143930276326e-06, "loss": 3.016620063781738, "step": 83480 }, { "epoch": 0.6739423488291372, "grad_norm": 0.9016583561897278, "learning_rate": 6.525528460538113e-06, "loss": 2.644700813293457, "step": 83490 }, { "epoch": 0.6740230701549043, "grad_norm": 0.6004673838615417, "learning_rate": 6.5239129907999e-06, "loss": 3.364696502685547, "step": 83500 }, { "epoch": 0.6741037914806712, "grad_norm": 1.6008926630020142, "learning_rate": 6.522297521061687e-06, "loss": 2.6407293319702148, "step": 83510 }, { "epoch": 0.6741845128064383, "grad_norm": 0.8169143199920654, "learning_rate": 6.520682051323474e-06, "loss": 2.343654823303223, "step": 83520 }, { "epoch": 0.6742652341322054, "grad_norm": 0.9226986765861511, "learning_rate": 6.5190665815852606e-06, "loss": 2.691971778869629, "step": 83530 }, { "epoch": 0.6743459554579725, "grad_norm": 0.8208984732627869, "learning_rate": 6.517451111847047e-06, "loss": 2.408664512634277, "step": 83540 }, { "epoch": 0.6744266767837395, "grad_norm": 0.7401043176651001, "learning_rate": 6.515835642108834e-06, "loss": 2.859402084350586, "step": 83550 }, { "epoch": 0.6745073981095066, "grad_norm": 0.7804547548294067, "learning_rate": 6.514220172370621e-06, "loss": 2.7036386489868165, "step": 83560 }, { "epoch": 0.6745881194352736, "grad_norm": 1.232064127922058, "learning_rate": 6.512604702632408e-06, "loss": 2.4141002655029298, "step": 83570 }, { "epoch": 0.6746688407610406, "grad_norm": 0.7441924214363098, "learning_rate": 6.510989232894195e-06, "loss": 2.4401140213012695, "step": 83580 }, { "epoch": 0.6747495620868077, "grad_norm": 0.8694127202033997, "learning_rate": 6.509373763155982e-06, "loss": 2.777903175354004, "step": 83590 }, { "epoch": 0.6748302834125748, "grad_norm": 0.5926547646522522, "learning_rate": 6.5077582934177685e-06, "loss": 2.6662527084350587, "step": 83600 }, { "epoch": 0.6749110047383419, "grad_norm": 0.926885187625885, "learning_rate": 6.506142823679555e-06, "loss": 2.528324508666992, "step": 83610 }, { "epoch": 0.6749917260641088, "grad_norm": 0.8761955499649048, "learning_rate": 6.504527353941343e-06, "loss": 2.442087745666504, "step": 83620 }, { "epoch": 0.6750724473898759, "grad_norm": 1.2767233848571777, "learning_rate": 6.50291188420313e-06, "loss": 2.4703880310058595, "step": 83630 }, { "epoch": 0.675153168715643, "grad_norm": 0.7658265233039856, "learning_rate": 6.501296414464917e-06, "loss": 3.046538734436035, "step": 83640 }, { "epoch": 0.67523389004141, "grad_norm": 1.2847607135772705, "learning_rate": 6.4996809447267036e-06, "loss": 2.7194545745849608, "step": 83650 }, { "epoch": 0.6753146113671771, "grad_norm": 0.9896910786628723, "learning_rate": 6.49806547498849e-06, "loss": 2.5970474243164063, "step": 83660 }, { "epoch": 0.6753953326929442, "grad_norm": 0.6736031174659729, "learning_rate": 6.496450005250278e-06, "loss": 2.616253662109375, "step": 83670 }, { "epoch": 0.6754760540187112, "grad_norm": 1.1801767349243164, "learning_rate": 6.494834535512065e-06, "loss": 2.8323131561279298, "step": 83680 }, { "epoch": 0.6755567753444782, "grad_norm": 1.6583828926086426, "learning_rate": 6.493219065773852e-06, "loss": 2.763951873779297, "step": 83690 }, { "epoch": 0.6756374966702453, "grad_norm": 0.5845260620117188, "learning_rate": 6.491603596035639e-06, "loss": 2.3265880584716796, "step": 83700 }, { "epoch": 0.6757182179960124, "grad_norm": 1.1819785833358765, "learning_rate": 6.4899881262974255e-06, "loss": 2.641061019897461, "step": 83710 }, { "epoch": 0.6757989393217794, "grad_norm": 1.422316074371338, "learning_rate": 6.488372656559212e-06, "loss": 2.6640689849853514, "step": 83720 }, { "epoch": 0.6758796606475465, "grad_norm": 0.5616673231124878, "learning_rate": 6.486757186820999e-06, "loss": 2.486018753051758, "step": 83730 }, { "epoch": 0.6759603819733135, "grad_norm": 0.8752912878990173, "learning_rate": 6.485141717082786e-06, "loss": 2.6597686767578126, "step": 83740 }, { "epoch": 0.6760411032990806, "grad_norm": 0.9194376468658447, "learning_rate": 6.483526247344573e-06, "loss": 2.6075244903564454, "step": 83750 }, { "epoch": 0.6761218246248476, "grad_norm": 1.5334460735321045, "learning_rate": 6.48191077760636e-06, "loss": 2.565613555908203, "step": 83760 }, { "epoch": 0.6762025459506147, "grad_norm": 0.6174551248550415, "learning_rate": 6.4802953078681466e-06, "loss": 2.955185890197754, "step": 83770 }, { "epoch": 0.6762832672763818, "grad_norm": 0.9763137698173523, "learning_rate": 6.478679838129933e-06, "loss": 2.4614704132080076, "step": 83780 }, { "epoch": 0.6763639886021487, "grad_norm": 0.956493616104126, "learning_rate": 6.47706436839172e-06, "loss": 2.4754562377929688, "step": 83790 }, { "epoch": 0.6764447099279158, "grad_norm": 1.089055061340332, "learning_rate": 6.475448898653507e-06, "loss": 2.412095069885254, "step": 83800 }, { "epoch": 0.6765254312536829, "grad_norm": 0.6190563440322876, "learning_rate": 6.473833428915294e-06, "loss": 2.4106267929077148, "step": 83810 }, { "epoch": 0.67660615257945, "grad_norm": 0.7278340458869934, "learning_rate": 6.472217959177081e-06, "loss": 2.615039825439453, "step": 83820 }, { "epoch": 0.676686873905217, "grad_norm": 0.9458596110343933, "learning_rate": 6.470602489438868e-06, "loss": 2.5729427337646484, "step": 83830 }, { "epoch": 0.6767675952309841, "grad_norm": 1.4476293325424194, "learning_rate": 6.4689870197006545e-06, "loss": 2.709194564819336, "step": 83840 }, { "epoch": 0.6768483165567512, "grad_norm": 0.7978459000587463, "learning_rate": 6.467371549962441e-06, "loss": 3.139954376220703, "step": 83850 }, { "epoch": 0.6769290378825182, "grad_norm": 0.9363816380500793, "learning_rate": 6.465756080224228e-06, "loss": 2.258066940307617, "step": 83860 }, { "epoch": 0.6770097592082852, "grad_norm": 0.9573693871498108, "learning_rate": 6.464140610486015e-06, "loss": 2.3909446716308596, "step": 83870 }, { "epoch": 0.6770904805340523, "grad_norm": 1.0174835920333862, "learning_rate": 6.462525140747802e-06, "loss": 2.4086511611938475, "step": 83880 }, { "epoch": 0.6771712018598194, "grad_norm": 1.0114566087722778, "learning_rate": 6.460909671009589e-06, "loss": 2.3789016723632814, "step": 83890 }, { "epoch": 0.6772519231855864, "grad_norm": 0.7076481580734253, "learning_rate": 6.4592942012713756e-06, "loss": 2.6404191970825197, "step": 83900 }, { "epoch": 0.6773326445113534, "grad_norm": 0.9560867547988892, "learning_rate": 6.457678731533162e-06, "loss": 2.331801414489746, "step": 83910 }, { "epoch": 0.6774133658371205, "grad_norm": 0.9647845029830933, "learning_rate": 6.456063261794949e-06, "loss": 2.651552200317383, "step": 83920 }, { "epoch": 0.6774940871628876, "grad_norm": 1.1785874366760254, "learning_rate": 6.454447792056736e-06, "loss": 2.9418193817138674, "step": 83930 }, { "epoch": 0.6775748084886546, "grad_norm": 0.7730422616004944, "learning_rate": 6.452832322318523e-06, "loss": 2.404377746582031, "step": 83940 }, { "epoch": 0.6776555298144217, "grad_norm": 1.3096566200256348, "learning_rate": 6.45121685258031e-06, "loss": 2.2428220748901366, "step": 83950 }, { "epoch": 0.6777362511401888, "grad_norm": 1.3751462697982788, "learning_rate": 6.449601382842097e-06, "loss": 2.5653446197509764, "step": 83960 }, { "epoch": 0.6778169724659557, "grad_norm": 0.6015730500221252, "learning_rate": 6.4479859131038835e-06, "loss": 3.137283134460449, "step": 83970 }, { "epoch": 0.6778976937917228, "grad_norm": 1.0623743534088135, "learning_rate": 6.44637044336567e-06, "loss": 2.8923017501831056, "step": 83980 }, { "epoch": 0.6779784151174899, "grad_norm": 0.8282727003097534, "learning_rate": 6.444754973627457e-06, "loss": 2.3802181243896485, "step": 83990 }, { "epoch": 0.678059136443257, "grad_norm": 0.7236531376838684, "learning_rate": 6.443139503889244e-06, "loss": 2.3157575607299803, "step": 84000 }, { "epoch": 0.678139857769024, "grad_norm": 1.333833932876587, "learning_rate": 6.441524034151031e-06, "loss": 2.802815818786621, "step": 84010 }, { "epoch": 0.678220579094791, "grad_norm": 0.7536894679069519, "learning_rate": 6.439908564412818e-06, "loss": 2.5229938507080076, "step": 84020 }, { "epoch": 0.6783013004205581, "grad_norm": 1.372024655342102, "learning_rate": 6.4382930946746046e-06, "loss": 2.657622146606445, "step": 84030 }, { "epoch": 0.6783820217463251, "grad_norm": 1.3587443828582764, "learning_rate": 6.436677624936391e-06, "loss": 2.8576385498046877, "step": 84040 }, { "epoch": 0.6784627430720922, "grad_norm": 1.0270025730133057, "learning_rate": 6.435062155198178e-06, "loss": 2.937653350830078, "step": 84050 }, { "epoch": 0.6785434643978593, "grad_norm": 0.8619361519813538, "learning_rate": 6.433446685459965e-06, "loss": 2.495635223388672, "step": 84060 }, { "epoch": 0.6786241857236264, "grad_norm": 1.1611998081207275, "learning_rate": 6.431831215721752e-06, "loss": 2.2891956329345704, "step": 84070 }, { "epoch": 0.6787049070493933, "grad_norm": 1.0915907621383667, "learning_rate": 6.430215745983539e-06, "loss": 2.88740234375, "step": 84080 }, { "epoch": 0.6787856283751604, "grad_norm": 0.8667702674865723, "learning_rate": 6.428600276245326e-06, "loss": 2.5803789138793944, "step": 84090 }, { "epoch": 0.6788663497009275, "grad_norm": 0.961284875869751, "learning_rate": 6.4269848065071125e-06, "loss": 2.592777442932129, "step": 84100 }, { "epoch": 0.6789470710266945, "grad_norm": 1.7156988382339478, "learning_rate": 6.425369336768899e-06, "loss": 2.257655906677246, "step": 84110 }, { "epoch": 0.6790277923524616, "grad_norm": 1.3077009916305542, "learning_rate": 6.423753867030686e-06, "loss": 2.5909564971923826, "step": 84120 }, { "epoch": 0.6791085136782287, "grad_norm": 3.304203987121582, "learning_rate": 6.422138397292473e-06, "loss": 2.7590789794921875, "step": 84130 }, { "epoch": 0.6791892350039958, "grad_norm": 0.5003132820129395, "learning_rate": 6.42052292755426e-06, "loss": 2.6343624114990236, "step": 84140 }, { "epoch": 0.6792699563297627, "grad_norm": 0.9784436821937561, "learning_rate": 6.418907457816047e-06, "loss": 2.6223495483398436, "step": 84150 }, { "epoch": 0.6793506776555298, "grad_norm": 1.8660809993743896, "learning_rate": 6.4172919880778335e-06, "loss": 2.5303449630737305, "step": 84160 }, { "epoch": 0.6794313989812969, "grad_norm": 0.7030837535858154, "learning_rate": 6.41567651833962e-06, "loss": 2.351601409912109, "step": 84170 }, { "epoch": 0.6795121203070639, "grad_norm": 0.804494321346283, "learning_rate": 6.414061048601407e-06, "loss": 2.455138397216797, "step": 84180 }, { "epoch": 0.679592841632831, "grad_norm": 0.8456012010574341, "learning_rate": 6.412445578863195e-06, "loss": 2.9266204833984375, "step": 84190 }, { "epoch": 0.679673562958598, "grad_norm": 0.7443310618400574, "learning_rate": 6.410830109124982e-06, "loss": 2.338526153564453, "step": 84200 }, { "epoch": 0.6797542842843651, "grad_norm": 2.524066209793091, "learning_rate": 6.409214639386769e-06, "loss": 2.8023775100708006, "step": 84210 }, { "epoch": 0.6798350056101321, "grad_norm": 0.8152661919593811, "learning_rate": 6.4075991696485555e-06, "loss": 3.087344169616699, "step": 84220 }, { "epoch": 0.6799157269358992, "grad_norm": 1.0482081174850464, "learning_rate": 6.405983699910342e-06, "loss": 2.5740024566650392, "step": 84230 }, { "epoch": 0.6799964482616663, "grad_norm": 0.8304738998413086, "learning_rate": 6.404368230172129e-06, "loss": 2.5276506423950194, "step": 84240 }, { "epoch": 0.6800771695874333, "grad_norm": 0.4923657178878784, "learning_rate": 6.402752760433916e-06, "loss": 3.157238006591797, "step": 84250 }, { "epoch": 0.6801578909132003, "grad_norm": 0.6924589276313782, "learning_rate": 6.401137290695703e-06, "loss": 2.7162349700927733, "step": 84260 }, { "epoch": 0.6802386122389674, "grad_norm": 3.241217851638794, "learning_rate": 6.39952182095749e-06, "loss": 3.02526798248291, "step": 84270 }, { "epoch": 0.6803193335647345, "grad_norm": 1.0728094577789307, "learning_rate": 6.3979063512192765e-06, "loss": 2.4627891540527345, "step": 84280 }, { "epoch": 0.6804000548905015, "grad_norm": 1.5388526916503906, "learning_rate": 6.396290881481063e-06, "loss": 2.630784606933594, "step": 84290 }, { "epoch": 0.6804807762162686, "grad_norm": 0.9337969422340393, "learning_rate": 6.39467541174285e-06, "loss": 2.5769912719726564, "step": 84300 }, { "epoch": 0.6805614975420357, "grad_norm": 1.162845253944397, "learning_rate": 6.393059942004637e-06, "loss": 2.789856719970703, "step": 84310 }, { "epoch": 0.6806422188678027, "grad_norm": 0.9409670829772949, "learning_rate": 6.391444472266424e-06, "loss": 2.399208831787109, "step": 84320 }, { "epoch": 0.6807229401935697, "grad_norm": 0.9442139863967896, "learning_rate": 6.389829002528211e-06, "loss": 2.4569005966186523, "step": 84330 }, { "epoch": 0.6808036615193368, "grad_norm": 0.6954894065856934, "learning_rate": 6.388213532789998e-06, "loss": 2.1824323654174806, "step": 84340 }, { "epoch": 0.6808843828451039, "grad_norm": 1.1822665929794312, "learning_rate": 6.3865980630517845e-06, "loss": 2.4630508422851562, "step": 84350 }, { "epoch": 0.6809651041708709, "grad_norm": 0.5426402688026428, "learning_rate": 6.384982593313571e-06, "loss": 2.8734487533569335, "step": 84360 }, { "epoch": 0.681045825496638, "grad_norm": 0.83221834897995, "learning_rate": 6.383367123575358e-06, "loss": 2.716731071472168, "step": 84370 }, { "epoch": 0.681126546822405, "grad_norm": 0.8553023338317871, "learning_rate": 6.381751653837145e-06, "loss": 2.5903770446777346, "step": 84380 }, { "epoch": 0.6812072681481721, "grad_norm": 0.9012615084648132, "learning_rate": 6.380136184098932e-06, "loss": 2.1636343002319336, "step": 84390 }, { "epoch": 0.6812879894739391, "grad_norm": 0.6245599985122681, "learning_rate": 6.378520714360719e-06, "loss": 3.1342304229736326, "step": 84400 }, { "epoch": 0.6813687107997062, "grad_norm": 1.1120309829711914, "learning_rate": 6.3769052446225055e-06, "loss": 2.1154111862182616, "step": 84410 }, { "epoch": 0.6814494321254733, "grad_norm": 1.1841961145401, "learning_rate": 6.375289774884292e-06, "loss": 3.1352569580078127, "step": 84420 }, { "epoch": 0.6815301534512402, "grad_norm": 1.0024466514587402, "learning_rate": 6.373674305146079e-06, "loss": 2.9186141967773436, "step": 84430 }, { "epoch": 0.6816108747770073, "grad_norm": 0.810366690158844, "learning_rate": 6.372058835407866e-06, "loss": 2.969365882873535, "step": 84440 }, { "epoch": 0.6816915961027744, "grad_norm": 1.4829906225204468, "learning_rate": 6.370443365669653e-06, "loss": 2.981037712097168, "step": 84450 }, { "epoch": 0.6817723174285415, "grad_norm": 0.9179831743240356, "learning_rate": 6.36882789593144e-06, "loss": 3.075004768371582, "step": 84460 }, { "epoch": 0.6818530387543085, "grad_norm": 1.1259974241256714, "learning_rate": 6.367212426193227e-06, "loss": 3.2849586486816404, "step": 84470 }, { "epoch": 0.6819337600800756, "grad_norm": 1.0396584272384644, "learning_rate": 6.3655969564550135e-06, "loss": 2.487683868408203, "step": 84480 }, { "epoch": 0.6820144814058426, "grad_norm": 0.6483521461486816, "learning_rate": 6.3639814867168e-06, "loss": 2.6299619674682617, "step": 84490 }, { "epoch": 0.6820952027316096, "grad_norm": 1.9455022811889648, "learning_rate": 6.362366016978587e-06, "loss": 3.3243667602539064, "step": 84500 }, { "epoch": 0.6821759240573767, "grad_norm": 0.5549551844596863, "learning_rate": 6.360750547240374e-06, "loss": 2.740535926818848, "step": 84510 }, { "epoch": 0.6822566453831438, "grad_norm": 0.9101641178131104, "learning_rate": 6.359135077502161e-06, "loss": 2.723356819152832, "step": 84520 }, { "epoch": 0.6823373667089109, "grad_norm": 1.1878807544708252, "learning_rate": 6.357519607763948e-06, "loss": 2.611846351623535, "step": 84530 }, { "epoch": 0.6824180880346778, "grad_norm": 0.8527159690856934, "learning_rate": 6.3559041380257345e-06, "loss": 2.5898584365844726, "step": 84540 }, { "epoch": 0.6824988093604449, "grad_norm": 0.8706998825073242, "learning_rate": 6.354288668287521e-06, "loss": 2.6412942886352537, "step": 84550 }, { "epoch": 0.682579530686212, "grad_norm": 1.3901925086975098, "learning_rate": 6.352673198549308e-06, "loss": 2.4930130004882813, "step": 84560 }, { "epoch": 0.682660252011979, "grad_norm": 0.9527645707130432, "learning_rate": 6.351057728811095e-06, "loss": 2.449718475341797, "step": 84570 }, { "epoch": 0.6827409733377461, "grad_norm": 0.6240437626838684, "learning_rate": 6.349442259072882e-06, "loss": 2.865001678466797, "step": 84580 }, { "epoch": 0.6828216946635132, "grad_norm": 0.8043046593666077, "learning_rate": 6.347826789334669e-06, "loss": 2.869332695007324, "step": 84590 }, { "epoch": 0.6829024159892803, "grad_norm": 0.7060052752494812, "learning_rate": 6.346211319596456e-06, "loss": 2.7230419158935546, "step": 84600 }, { "epoch": 0.6829831373150472, "grad_norm": 0.5710828304290771, "learning_rate": 6.3445958498582424e-06, "loss": 2.8561901092529296, "step": 84610 }, { "epoch": 0.6830638586408143, "grad_norm": 1.1452597379684448, "learning_rate": 6.342980380120029e-06, "loss": 2.6707250595092775, "step": 84620 }, { "epoch": 0.6831445799665814, "grad_norm": 0.9975919127464294, "learning_rate": 6.341364910381816e-06, "loss": 2.9778980255126952, "step": 84630 }, { "epoch": 0.6832253012923484, "grad_norm": 0.6539238095283508, "learning_rate": 6.339749440643603e-06, "loss": 2.417691230773926, "step": 84640 }, { "epoch": 0.6833060226181155, "grad_norm": 1.3084571361541748, "learning_rate": 6.33813397090539e-06, "loss": 3.0207672119140625, "step": 84650 }, { "epoch": 0.6833867439438825, "grad_norm": 1.5459309816360474, "learning_rate": 6.336518501167177e-06, "loss": 2.9502960205078126, "step": 84660 }, { "epoch": 0.6834674652696496, "grad_norm": 1.1015292406082153, "learning_rate": 6.3349030314289635e-06, "loss": 2.9130828857421873, "step": 84670 }, { "epoch": 0.6835481865954166, "grad_norm": 1.2944786548614502, "learning_rate": 6.33328756169075e-06, "loss": 3.156872367858887, "step": 84680 }, { "epoch": 0.6836289079211837, "grad_norm": 1.1034787893295288, "learning_rate": 6.331672091952537e-06, "loss": 2.404983901977539, "step": 84690 }, { "epoch": 0.6837096292469508, "grad_norm": 1.4235860109329224, "learning_rate": 6.330056622214325e-06, "loss": 2.5997724533081055, "step": 84700 }, { "epoch": 0.6837903505727178, "grad_norm": 1.2970486879348755, "learning_rate": 6.328441152476112e-06, "loss": 2.747502326965332, "step": 84710 }, { "epoch": 0.6838710718984848, "grad_norm": 0.9441324472427368, "learning_rate": 6.326825682737899e-06, "loss": 2.45556526184082, "step": 84720 }, { "epoch": 0.6839517932242519, "grad_norm": 1.1516536474227905, "learning_rate": 6.3252102129996854e-06, "loss": 2.8561079025268556, "step": 84730 }, { "epoch": 0.684032514550019, "grad_norm": 1.4195276498794556, "learning_rate": 6.323594743261472e-06, "loss": 2.5951709747314453, "step": 84740 }, { "epoch": 0.684113235875786, "grad_norm": 0.913642406463623, "learning_rate": 6.321979273523259e-06, "loss": 2.442063331604004, "step": 84750 }, { "epoch": 0.6841939572015531, "grad_norm": 0.742957353591919, "learning_rate": 6.320363803785046e-06, "loss": 2.844905471801758, "step": 84760 }, { "epoch": 0.6842746785273202, "grad_norm": 1.6146860122680664, "learning_rate": 6.318748334046833e-06, "loss": 2.7999887466430664, "step": 84770 }, { "epoch": 0.6843553998530871, "grad_norm": 0.975509524345398, "learning_rate": 6.31713286430862e-06, "loss": 2.8426513671875, "step": 84780 }, { "epoch": 0.6844361211788542, "grad_norm": 0.6401039361953735, "learning_rate": 6.3155173945704065e-06, "loss": 2.9738157272338865, "step": 84790 }, { "epoch": 0.6845168425046213, "grad_norm": 1.0280259847640991, "learning_rate": 6.313901924832193e-06, "loss": 2.6843795776367188, "step": 84800 }, { "epoch": 0.6845975638303884, "grad_norm": 0.7087679505348206, "learning_rate": 6.31228645509398e-06, "loss": 2.474871063232422, "step": 84810 }, { "epoch": 0.6846782851561554, "grad_norm": 0.6203760504722595, "learning_rate": 6.310670985355767e-06, "loss": 3.147511100769043, "step": 84820 }, { "epoch": 0.6847590064819224, "grad_norm": 1.1645724773406982, "learning_rate": 6.309055515617554e-06, "loss": 2.8409690856933594, "step": 84830 }, { "epoch": 0.6848397278076895, "grad_norm": 0.5022950768470764, "learning_rate": 6.307440045879341e-06, "loss": 2.475852394104004, "step": 84840 }, { "epoch": 0.6849204491334566, "grad_norm": 0.5505865216255188, "learning_rate": 6.305824576141128e-06, "loss": 2.795197296142578, "step": 84850 }, { "epoch": 0.6850011704592236, "grad_norm": 0.7904987931251526, "learning_rate": 6.3042091064029144e-06, "loss": 2.4106855392456055, "step": 84860 }, { "epoch": 0.6850818917849907, "grad_norm": 1.0630897283554077, "learning_rate": 6.302593636664701e-06, "loss": 2.4925952911376954, "step": 84870 }, { "epoch": 0.6851626131107578, "grad_norm": 0.8224812150001526, "learning_rate": 6.300978166926489e-06, "loss": 2.3421817779541017, "step": 84880 }, { "epoch": 0.6852433344365247, "grad_norm": 0.7997540831565857, "learning_rate": 6.299362697188276e-06, "loss": 3.0421493530273436, "step": 84890 }, { "epoch": 0.6853240557622918, "grad_norm": 0.9026281833648682, "learning_rate": 6.297747227450063e-06, "loss": 2.5055065155029297, "step": 84900 }, { "epoch": 0.6854047770880589, "grad_norm": 1.0768555402755737, "learning_rate": 6.2961317577118495e-06, "loss": 2.908219909667969, "step": 84910 }, { "epoch": 0.685485498413826, "grad_norm": 0.5911391973495483, "learning_rate": 6.294516287973636e-06, "loss": 2.5942781448364256, "step": 84920 }, { "epoch": 0.685566219739593, "grad_norm": 1.4301128387451172, "learning_rate": 6.292900818235423e-06, "loss": 2.40616397857666, "step": 84930 }, { "epoch": 0.6856469410653601, "grad_norm": 1.2885854244232178, "learning_rate": 6.29128534849721e-06, "loss": 2.445633316040039, "step": 84940 }, { "epoch": 0.6857276623911271, "grad_norm": 0.8602668046951294, "learning_rate": 6.289669878758997e-06, "loss": 2.4072975158691405, "step": 84950 }, { "epoch": 0.6858083837168941, "grad_norm": 0.6705795526504517, "learning_rate": 6.288054409020784e-06, "loss": 2.9446733474731444, "step": 84960 }, { "epoch": 0.6858891050426612, "grad_norm": 1.2877424955368042, "learning_rate": 6.286438939282571e-06, "loss": 2.3417158126831055, "step": 84970 }, { "epoch": 0.6859698263684283, "grad_norm": 0.9354695677757263, "learning_rate": 6.2848234695443574e-06, "loss": 2.5566474914550783, "step": 84980 }, { "epoch": 0.6860505476941954, "grad_norm": 0.9032930135726929, "learning_rate": 6.283207999806144e-06, "loss": 2.3353466033935546, "step": 84990 }, { "epoch": 0.6861312690199624, "grad_norm": 1.4070050716400146, "learning_rate": 6.281592530067931e-06, "loss": 2.4549491882324217, "step": 85000 }, { "epoch": 0.6862119903457294, "grad_norm": 0.8378775715827942, "learning_rate": 6.279977060329718e-06, "loss": 2.3586753845214843, "step": 85010 }, { "epoch": 0.6862927116714965, "grad_norm": 0.7113619446754456, "learning_rate": 6.278361590591505e-06, "loss": 2.5063243865966798, "step": 85020 }, { "epoch": 0.6863734329972635, "grad_norm": 0.8520200848579407, "learning_rate": 6.276746120853292e-06, "loss": 2.6206308364868165, "step": 85030 }, { "epoch": 0.6864541543230306, "grad_norm": 0.8871832489967346, "learning_rate": 6.2751306511150785e-06, "loss": 2.711198425292969, "step": 85040 }, { "epoch": 0.6865348756487977, "grad_norm": 0.9902923703193665, "learning_rate": 6.273515181376865e-06, "loss": 3.5576198577880858, "step": 85050 }, { "epoch": 0.6866155969745648, "grad_norm": 1.0519437789916992, "learning_rate": 6.271899711638653e-06, "loss": 2.5256219863891602, "step": 85060 }, { "epoch": 0.6866963183003317, "grad_norm": 0.6424862146377563, "learning_rate": 6.27028424190044e-06, "loss": 3.0458614349365236, "step": 85070 }, { "epoch": 0.6867770396260988, "grad_norm": 0.8729420900344849, "learning_rate": 6.268668772162227e-06, "loss": 2.768837738037109, "step": 85080 }, { "epoch": 0.6868577609518659, "grad_norm": 1.931232213973999, "learning_rate": 6.267053302424014e-06, "loss": 2.7442312240600586, "step": 85090 }, { "epoch": 0.6869384822776329, "grad_norm": 0.8091806769371033, "learning_rate": 6.2654378326858004e-06, "loss": 2.8652587890625, "step": 85100 }, { "epoch": 0.6870192036034, "grad_norm": 0.7670491933822632, "learning_rate": 6.263822362947587e-06, "loss": 2.6232332229614257, "step": 85110 }, { "epoch": 0.687099924929167, "grad_norm": 1.1641374826431274, "learning_rate": 6.262206893209374e-06, "loss": 2.8046064376831055, "step": 85120 }, { "epoch": 0.6871806462549341, "grad_norm": 0.742621123790741, "learning_rate": 6.260591423471161e-06, "loss": 2.470857620239258, "step": 85130 }, { "epoch": 0.6872613675807011, "grad_norm": 1.1663148403167725, "learning_rate": 6.258975953732948e-06, "loss": 3.192656707763672, "step": 85140 }, { "epoch": 0.6873420889064682, "grad_norm": 0.6890885829925537, "learning_rate": 6.257360483994735e-06, "loss": 2.6128307342529298, "step": 85150 }, { "epoch": 0.6874228102322353, "grad_norm": 0.8150679469108582, "learning_rate": 6.2557450142565215e-06, "loss": 2.4883749008178713, "step": 85160 }, { "epoch": 0.6875035315580023, "grad_norm": 1.2678003311157227, "learning_rate": 6.254129544518308e-06, "loss": 2.8517650604248046, "step": 85170 }, { "epoch": 0.6875842528837693, "grad_norm": 1.2404015064239502, "learning_rate": 6.252514074780095e-06, "loss": 2.622829627990723, "step": 85180 }, { "epoch": 0.6876649742095364, "grad_norm": 0.7870196104049683, "learning_rate": 6.250898605041882e-06, "loss": 2.2490467071533202, "step": 85190 }, { "epoch": 0.6877456955353035, "grad_norm": 1.2293843030929565, "learning_rate": 6.249283135303669e-06, "loss": 2.520673370361328, "step": 85200 }, { "epoch": 0.6878264168610705, "grad_norm": 0.8003518581390381, "learning_rate": 6.247667665565456e-06, "loss": 2.566790771484375, "step": 85210 }, { "epoch": 0.6879071381868376, "grad_norm": 1.0531363487243652, "learning_rate": 6.2460521958272426e-06, "loss": 2.6851690292358397, "step": 85220 }, { "epoch": 0.6879878595126047, "grad_norm": 0.9491792917251587, "learning_rate": 6.244436726089029e-06, "loss": 2.818344497680664, "step": 85230 }, { "epoch": 0.6880685808383716, "grad_norm": 0.9201355576515198, "learning_rate": 6.242821256350816e-06, "loss": 2.8513252258300783, "step": 85240 }, { "epoch": 0.6881493021641387, "grad_norm": 1.107900857925415, "learning_rate": 6.241205786612603e-06, "loss": 2.826185417175293, "step": 85250 }, { "epoch": 0.6882300234899058, "grad_norm": 0.7432565689086914, "learning_rate": 6.23959031687439e-06, "loss": 2.7171897888183594, "step": 85260 }, { "epoch": 0.6883107448156729, "grad_norm": 1.2615000009536743, "learning_rate": 6.237974847136177e-06, "loss": 2.2889944076538087, "step": 85270 }, { "epoch": 0.6883914661414399, "grad_norm": 2.734861373901367, "learning_rate": 6.236359377397964e-06, "loss": 2.6841596603393554, "step": 85280 }, { "epoch": 0.688472187467207, "grad_norm": 0.825867235660553, "learning_rate": 6.2347439076597505e-06, "loss": 2.5905378341674803, "step": 85290 }, { "epoch": 0.688552908792974, "grad_norm": 0.6625578999519348, "learning_rate": 6.233128437921537e-06, "loss": 2.744338798522949, "step": 85300 }, { "epoch": 0.6886336301187411, "grad_norm": 1.3417222499847412, "learning_rate": 6.231512968183324e-06, "loss": 2.8464223861694338, "step": 85310 }, { "epoch": 0.6887143514445081, "grad_norm": 1.4834553003311157, "learning_rate": 6.229897498445111e-06, "loss": 3.135731506347656, "step": 85320 }, { "epoch": 0.6887950727702752, "grad_norm": 1.2815709114074707, "learning_rate": 6.228282028706898e-06, "loss": 2.780867004394531, "step": 85330 }, { "epoch": 0.6888757940960423, "grad_norm": 1.0065767765045166, "learning_rate": 6.226666558968685e-06, "loss": 2.919144630432129, "step": 85340 }, { "epoch": 0.6889565154218092, "grad_norm": 1.2209559679031372, "learning_rate": 6.2250510892304716e-06, "loss": 2.6233457565307616, "step": 85350 }, { "epoch": 0.6890372367475763, "grad_norm": 1.3134669065475464, "learning_rate": 6.223435619492258e-06, "loss": 2.9081886291503904, "step": 85360 }, { "epoch": 0.6891179580733434, "grad_norm": 1.0251082181930542, "learning_rate": 6.221820149754045e-06, "loss": 2.7619916915893556, "step": 85370 }, { "epoch": 0.6891986793991105, "grad_norm": 1.1189806461334229, "learning_rate": 6.220204680015832e-06, "loss": 2.3821109771728515, "step": 85380 }, { "epoch": 0.6892794007248775, "grad_norm": 0.8969225287437439, "learning_rate": 6.218589210277619e-06, "loss": 2.5228641510009764, "step": 85390 }, { "epoch": 0.6893601220506446, "grad_norm": 0.9100530743598938, "learning_rate": 6.216973740539406e-06, "loss": 2.6940637588500977, "step": 85400 }, { "epoch": 0.6894408433764116, "grad_norm": 1.159777283668518, "learning_rate": 6.215358270801193e-06, "loss": 2.5962503433227537, "step": 85410 }, { "epoch": 0.6895215647021786, "grad_norm": 0.9195418357849121, "learning_rate": 6.2137428010629795e-06, "loss": 3.0728527069091798, "step": 85420 }, { "epoch": 0.6896022860279457, "grad_norm": 0.9250391721725464, "learning_rate": 6.212127331324766e-06, "loss": 2.610184097290039, "step": 85430 }, { "epoch": 0.6896830073537128, "grad_norm": 1.7767462730407715, "learning_rate": 6.210511861586553e-06, "loss": 2.5888544082641602, "step": 85440 }, { "epoch": 0.6897637286794799, "grad_norm": 1.1969029903411865, "learning_rate": 6.20889639184834e-06, "loss": 2.2995332717895507, "step": 85450 }, { "epoch": 0.6898444500052469, "grad_norm": 1.4665963649749756, "learning_rate": 6.207280922110127e-06, "loss": 2.692509651184082, "step": 85460 }, { "epoch": 0.6899251713310139, "grad_norm": 0.7782687544822693, "learning_rate": 6.205665452371914e-06, "loss": 2.3260047912597654, "step": 85470 }, { "epoch": 0.690005892656781, "grad_norm": 0.8851797580718994, "learning_rate": 6.2040499826337006e-06, "loss": 2.6296030044555665, "step": 85480 }, { "epoch": 0.690086613982548, "grad_norm": 1.440245270729065, "learning_rate": 6.202434512895487e-06, "loss": 2.6359352111816405, "step": 85490 }, { "epoch": 0.6901673353083151, "grad_norm": 1.1416012048721313, "learning_rate": 6.200819043157274e-06, "loss": 2.809164619445801, "step": 85500 }, { "epoch": 0.6902480566340822, "grad_norm": 1.7593194246292114, "learning_rate": 6.199203573419061e-06, "loss": 2.6719940185546873, "step": 85510 }, { "epoch": 0.6903287779598493, "grad_norm": 1.0314807891845703, "learning_rate": 6.197588103680848e-06, "loss": 2.578913116455078, "step": 85520 }, { "epoch": 0.6904094992856162, "grad_norm": 1.4688478708267212, "learning_rate": 6.195972633942635e-06, "loss": 2.592227554321289, "step": 85530 }, { "epoch": 0.6904902206113833, "grad_norm": 0.5844392776489258, "learning_rate": 6.194357164204422e-06, "loss": 2.9548627853393556, "step": 85540 }, { "epoch": 0.6905709419371504, "grad_norm": 0.9105039834976196, "learning_rate": 6.1927416944662085e-06, "loss": 2.4640487670898437, "step": 85550 }, { "epoch": 0.6906516632629174, "grad_norm": 0.7318166494369507, "learning_rate": 6.191126224727995e-06, "loss": 2.381887435913086, "step": 85560 }, { "epoch": 0.6907323845886845, "grad_norm": 0.6447053551673889, "learning_rate": 6.189510754989783e-06, "loss": 2.5872787475585937, "step": 85570 }, { "epoch": 0.6908131059144516, "grad_norm": 1.0654373168945312, "learning_rate": 6.18789528525157e-06, "loss": 2.895658493041992, "step": 85580 }, { "epoch": 0.6908938272402186, "grad_norm": 1.0980265140533447, "learning_rate": 6.186279815513357e-06, "loss": 2.8608264923095703, "step": 85590 }, { "epoch": 0.6909745485659856, "grad_norm": 2.149280071258545, "learning_rate": 6.1846643457751436e-06, "loss": 2.6878551483154296, "step": 85600 }, { "epoch": 0.6910552698917527, "grad_norm": 0.5547802448272705, "learning_rate": 6.18304887603693e-06, "loss": 2.8820470809936523, "step": 85610 }, { "epoch": 0.6911359912175198, "grad_norm": 1.2538836002349854, "learning_rate": 6.181433406298717e-06, "loss": 2.454410743713379, "step": 85620 }, { "epoch": 0.6912167125432868, "grad_norm": 0.8630930781364441, "learning_rate": 6.179817936560504e-06, "loss": 2.8044448852539063, "step": 85630 }, { "epoch": 0.6912974338690538, "grad_norm": 1.1035676002502441, "learning_rate": 6.178202466822291e-06, "loss": 2.5213451385498047, "step": 85640 }, { "epoch": 0.6913781551948209, "grad_norm": 0.8569425344467163, "learning_rate": 6.176586997084078e-06, "loss": 2.839984321594238, "step": 85650 }, { "epoch": 0.691458876520588, "grad_norm": 1.3242292404174805, "learning_rate": 6.174971527345865e-06, "loss": 2.653171730041504, "step": 85660 }, { "epoch": 0.691539597846355, "grad_norm": 0.8235015869140625, "learning_rate": 6.1733560576076515e-06, "loss": 2.510545349121094, "step": 85670 }, { "epoch": 0.6916203191721221, "grad_norm": 0.9267180562019348, "learning_rate": 6.171740587869438e-06, "loss": 2.5357135772705077, "step": 85680 }, { "epoch": 0.6917010404978892, "grad_norm": 0.9537495970726013, "learning_rate": 6.170125118131225e-06, "loss": 2.7003421783447266, "step": 85690 }, { "epoch": 0.6917817618236561, "grad_norm": 1.8418924808502197, "learning_rate": 6.168509648393012e-06, "loss": 2.8089776992797852, "step": 85700 }, { "epoch": 0.6918624831494232, "grad_norm": 1.0530425310134888, "learning_rate": 6.166894178654799e-06, "loss": 2.7652746200561524, "step": 85710 }, { "epoch": 0.6919432044751903, "grad_norm": 0.7472862601280212, "learning_rate": 6.165278708916586e-06, "loss": 2.578551483154297, "step": 85720 }, { "epoch": 0.6920239258009574, "grad_norm": 1.6078649759292603, "learning_rate": 6.1636632391783726e-06, "loss": 2.5812450408935548, "step": 85730 }, { "epoch": 0.6921046471267244, "grad_norm": 0.9626645445823669, "learning_rate": 6.162047769440159e-06, "loss": 2.8675424575805666, "step": 85740 }, { "epoch": 0.6921853684524915, "grad_norm": 1.0131139755249023, "learning_rate": 6.160432299701946e-06, "loss": 2.6957515716552733, "step": 85750 }, { "epoch": 0.6922660897782585, "grad_norm": 0.9498013854026794, "learning_rate": 6.158816829963733e-06, "loss": 2.916141128540039, "step": 85760 }, { "epoch": 0.6923468111040256, "grad_norm": 1.1377713680267334, "learning_rate": 6.15720136022552e-06, "loss": 2.8712467193603515, "step": 85770 }, { "epoch": 0.6924275324297926, "grad_norm": 0.765725314617157, "learning_rate": 6.155585890487307e-06, "loss": 2.6955198287963866, "step": 85780 }, { "epoch": 0.6925082537555597, "grad_norm": 1.042661190032959, "learning_rate": 6.153970420749094e-06, "loss": 2.872276496887207, "step": 85790 }, { "epoch": 0.6925889750813268, "grad_norm": 1.1504334211349487, "learning_rate": 6.1523549510108805e-06, "loss": 2.853282356262207, "step": 85800 }, { "epoch": 0.6926696964070937, "grad_norm": 1.1213288307189941, "learning_rate": 6.150739481272667e-06, "loss": 2.949026679992676, "step": 85810 }, { "epoch": 0.6927504177328608, "grad_norm": 0.9014081358909607, "learning_rate": 6.149124011534454e-06, "loss": 2.2981754302978517, "step": 85820 }, { "epoch": 0.6928311390586279, "grad_norm": 0.6867228746414185, "learning_rate": 6.147508541796241e-06, "loss": 2.554093360900879, "step": 85830 }, { "epoch": 0.692911860384395, "grad_norm": 0.7888578176498413, "learning_rate": 6.145893072058028e-06, "loss": 2.666652488708496, "step": 85840 }, { "epoch": 0.692992581710162, "grad_norm": 0.989102303981781, "learning_rate": 6.144277602319815e-06, "loss": 2.9502643585205077, "step": 85850 }, { "epoch": 0.6930733030359291, "grad_norm": 1.130990982055664, "learning_rate": 6.1426621325816015e-06, "loss": 2.71795597076416, "step": 85860 }, { "epoch": 0.6931540243616962, "grad_norm": 0.6622020602226257, "learning_rate": 6.141046662843388e-06, "loss": 2.7853193283081055, "step": 85870 }, { "epoch": 0.6932347456874631, "grad_norm": 1.3621946573257446, "learning_rate": 6.139431193105175e-06, "loss": 2.935290718078613, "step": 85880 }, { "epoch": 0.6933154670132302, "grad_norm": 1.7280722856521606, "learning_rate": 6.137815723366962e-06, "loss": 2.7363224029541016, "step": 85890 }, { "epoch": 0.6933961883389973, "grad_norm": 0.8688772916793823, "learning_rate": 6.136200253628749e-06, "loss": 2.7679021835327147, "step": 85900 }, { "epoch": 0.6934769096647644, "grad_norm": 0.6805923581123352, "learning_rate": 6.134584783890536e-06, "loss": 2.7228418350219727, "step": 85910 }, { "epoch": 0.6935576309905314, "grad_norm": 0.8545172214508057, "learning_rate": 6.132969314152323e-06, "loss": 3.2069477081298827, "step": 85920 }, { "epoch": 0.6936383523162984, "grad_norm": 0.9570186734199524, "learning_rate": 6.1313538444141095e-06, "loss": 2.770412254333496, "step": 85930 }, { "epoch": 0.6937190736420655, "grad_norm": 0.6500405073165894, "learning_rate": 6.129738374675896e-06, "loss": 2.5224090576171876, "step": 85940 }, { "epoch": 0.6937997949678325, "grad_norm": 0.8404064178466797, "learning_rate": 6.128122904937683e-06, "loss": 2.8722105026245117, "step": 85950 }, { "epoch": 0.6938805162935996, "grad_norm": 0.7445281744003296, "learning_rate": 6.12650743519947e-06, "loss": 2.6441675186157227, "step": 85960 }, { "epoch": 0.6939612376193667, "grad_norm": 0.6512880325317383, "learning_rate": 6.124891965461257e-06, "loss": 2.307447624206543, "step": 85970 }, { "epoch": 0.6940419589451338, "grad_norm": 0.904560923576355, "learning_rate": 6.123276495723044e-06, "loss": 2.684772491455078, "step": 85980 }, { "epoch": 0.6941226802709007, "grad_norm": 0.676793098449707, "learning_rate": 6.1216610259848305e-06, "loss": 2.790359306335449, "step": 85990 }, { "epoch": 0.6942034015966678, "grad_norm": 0.7276565432548523, "learning_rate": 6.120045556246617e-06, "loss": 2.45316219329834, "step": 86000 }, { "epoch": 0.6942841229224349, "grad_norm": 0.5917619466781616, "learning_rate": 6.118430086508404e-06, "loss": 2.2302629470825197, "step": 86010 }, { "epoch": 0.6943648442482019, "grad_norm": 1.3603579998016357, "learning_rate": 6.116814616770191e-06, "loss": 2.8837326049804686, "step": 86020 }, { "epoch": 0.694445565573969, "grad_norm": 0.8289100527763367, "learning_rate": 6.115199147031978e-06, "loss": 2.813032341003418, "step": 86030 }, { "epoch": 0.6945262868997361, "grad_norm": 0.8689718842506409, "learning_rate": 6.113583677293765e-06, "loss": 2.341672897338867, "step": 86040 }, { "epoch": 0.6946070082255031, "grad_norm": 0.7118547558784485, "learning_rate": 6.111968207555552e-06, "loss": 2.5722978591918944, "step": 86050 }, { "epoch": 0.6946877295512701, "grad_norm": 0.780957818031311, "learning_rate": 6.1103527378173385e-06, "loss": 3.007542037963867, "step": 86060 }, { "epoch": 0.6947684508770372, "grad_norm": 1.1307514905929565, "learning_rate": 6.108737268079125e-06, "loss": 2.489350700378418, "step": 86070 }, { "epoch": 0.6948491722028043, "grad_norm": 1.0697143077850342, "learning_rate": 6.107121798340912e-06, "loss": 2.9467092514038087, "step": 86080 }, { "epoch": 0.6949298935285713, "grad_norm": 1.0326184034347534, "learning_rate": 6.1055063286027e-06, "loss": 2.6212030410766602, "step": 86090 }, { "epoch": 0.6950106148543383, "grad_norm": 1.3603955507278442, "learning_rate": 6.103890858864487e-06, "loss": 2.766978454589844, "step": 86100 }, { "epoch": 0.6950913361801054, "grad_norm": 1.0576101541519165, "learning_rate": 6.1022753891262735e-06, "loss": 2.666978645324707, "step": 86110 }, { "epoch": 0.6951720575058725, "grad_norm": 0.8216370344161987, "learning_rate": 6.10065991938806e-06, "loss": 2.7555927276611327, "step": 86120 }, { "epoch": 0.6952527788316395, "grad_norm": 1.1544456481933594, "learning_rate": 6.099044449649848e-06, "loss": 2.7585689544677736, "step": 86130 }, { "epoch": 0.6953335001574066, "grad_norm": 1.4978047609329224, "learning_rate": 6.097428979911635e-06, "loss": 2.3614253997802734, "step": 86140 }, { "epoch": 0.6954142214831737, "grad_norm": 1.2607516050338745, "learning_rate": 6.095813510173422e-06, "loss": 2.554608154296875, "step": 86150 }, { "epoch": 0.6954949428089406, "grad_norm": 0.6699023842811584, "learning_rate": 6.094198040435209e-06, "loss": 2.1746700286865233, "step": 86160 }, { "epoch": 0.6955756641347077, "grad_norm": 1.353361964225769, "learning_rate": 6.0925825706969955e-06, "loss": 2.59647274017334, "step": 86170 }, { "epoch": 0.6956563854604748, "grad_norm": 1.447791576385498, "learning_rate": 6.090967100958782e-06, "loss": 2.869887351989746, "step": 86180 }, { "epoch": 0.6957371067862419, "grad_norm": 0.7349399924278259, "learning_rate": 6.089351631220569e-06, "loss": 2.879430389404297, "step": 86190 }, { "epoch": 0.6958178281120089, "grad_norm": 1.2542457580566406, "learning_rate": 6.087736161482356e-06, "loss": 2.270293045043945, "step": 86200 }, { "epoch": 0.695898549437776, "grad_norm": 0.907337486743927, "learning_rate": 6.086120691744143e-06, "loss": 2.7947433471679686, "step": 86210 }, { "epoch": 0.695979270763543, "grad_norm": 1.2193673849105835, "learning_rate": 6.08450522200593e-06, "loss": 2.8662384033203123, "step": 86220 }, { "epoch": 0.69605999208931, "grad_norm": 0.8608376979827881, "learning_rate": 6.0828897522677165e-06, "loss": 2.783049774169922, "step": 86230 }, { "epoch": 0.6961407134150771, "grad_norm": 0.7669339776039124, "learning_rate": 6.081274282529503e-06, "loss": 2.3267284393310548, "step": 86240 }, { "epoch": 0.6962214347408442, "grad_norm": 0.7720862030982971, "learning_rate": 6.07965881279129e-06, "loss": 2.7514509201049804, "step": 86250 }, { "epoch": 0.6963021560666113, "grad_norm": 0.8948829770088196, "learning_rate": 6.078043343053077e-06, "loss": 2.692584228515625, "step": 86260 }, { "epoch": 0.6963828773923783, "grad_norm": 1.4464483261108398, "learning_rate": 6.076427873314864e-06, "loss": 2.808034896850586, "step": 86270 }, { "epoch": 0.6964635987181453, "grad_norm": 0.8936949372291565, "learning_rate": 6.074812403576651e-06, "loss": 2.855031204223633, "step": 86280 }, { "epoch": 0.6965443200439124, "grad_norm": 0.9435817003250122, "learning_rate": 6.073196933838438e-06, "loss": 2.5305843353271484, "step": 86290 }, { "epoch": 0.6966250413696795, "grad_norm": 1.2184321880340576, "learning_rate": 6.0715814641002245e-06, "loss": 2.7008026123046873, "step": 86300 }, { "epoch": 0.6967057626954465, "grad_norm": 0.7243841886520386, "learning_rate": 6.069965994362011e-06, "loss": 2.564923095703125, "step": 86310 }, { "epoch": 0.6967864840212136, "grad_norm": 1.8375179767608643, "learning_rate": 6.068350524623798e-06, "loss": 2.954241943359375, "step": 86320 }, { "epoch": 0.6968672053469807, "grad_norm": 1.6480498313903809, "learning_rate": 6.066735054885585e-06, "loss": 2.6210958480834963, "step": 86330 }, { "epoch": 0.6969479266727476, "grad_norm": 2.2573864459991455, "learning_rate": 6.065119585147372e-06, "loss": 2.9371759414672853, "step": 86340 }, { "epoch": 0.6970286479985147, "grad_norm": 1.1285521984100342, "learning_rate": 6.063504115409159e-06, "loss": 2.421864318847656, "step": 86350 }, { "epoch": 0.6971093693242818, "grad_norm": 1.3794772624969482, "learning_rate": 6.0618886456709455e-06, "loss": 2.565459060668945, "step": 86360 }, { "epoch": 0.6971900906500489, "grad_norm": 0.7349769473075867, "learning_rate": 6.060273175932732e-06, "loss": 2.67879638671875, "step": 86370 }, { "epoch": 0.6972708119758159, "grad_norm": 1.0370959043502808, "learning_rate": 6.058657706194519e-06, "loss": 3.260462188720703, "step": 86380 }, { "epoch": 0.697351533301583, "grad_norm": 0.9629529714584351, "learning_rate": 6.057042236456306e-06, "loss": 2.6212818145751955, "step": 86390 }, { "epoch": 0.69743225462735, "grad_norm": 1.3025144338607788, "learning_rate": 6.055426766718093e-06, "loss": 2.726631736755371, "step": 86400 }, { "epoch": 0.697512975953117, "grad_norm": 1.0214976072311401, "learning_rate": 6.05381129697988e-06, "loss": 2.3508182525634767, "step": 86410 }, { "epoch": 0.6975936972788841, "grad_norm": 1.1230547428131104, "learning_rate": 6.052195827241667e-06, "loss": 2.5390838623046874, "step": 86420 }, { "epoch": 0.6976744186046512, "grad_norm": 0.8729099631309509, "learning_rate": 6.0505803575034534e-06, "loss": 2.483108711242676, "step": 86430 }, { "epoch": 0.6977551399304183, "grad_norm": 0.852624237537384, "learning_rate": 6.04896488776524e-06, "loss": 2.6734527587890624, "step": 86440 }, { "epoch": 0.6978358612561852, "grad_norm": 0.6021596789360046, "learning_rate": 6.047349418027028e-06, "loss": 2.6817672729492186, "step": 86450 }, { "epoch": 0.6979165825819523, "grad_norm": 1.0617542266845703, "learning_rate": 6.045733948288815e-06, "loss": 2.6951997756958006, "step": 86460 }, { "epoch": 0.6979973039077194, "grad_norm": 1.0802867412567139, "learning_rate": 6.044118478550602e-06, "loss": 2.6814483642578124, "step": 86470 }, { "epoch": 0.6980780252334864, "grad_norm": 0.8061081171035767, "learning_rate": 6.0425030088123885e-06, "loss": 2.461522674560547, "step": 86480 }, { "epoch": 0.6981587465592535, "grad_norm": 0.9195652604103088, "learning_rate": 6.040887539074175e-06, "loss": 2.3623140335083006, "step": 86490 }, { "epoch": 0.6982394678850206, "grad_norm": 1.103054404258728, "learning_rate": 6.039272069335962e-06, "loss": 2.402095413208008, "step": 86500 }, { "epoch": 0.6983201892107876, "grad_norm": 1.1355293989181519, "learning_rate": 6.037656599597749e-06, "loss": 2.763395309448242, "step": 86510 }, { "epoch": 0.6984009105365546, "grad_norm": 0.772994875907898, "learning_rate": 6.036041129859536e-06, "loss": 2.366904640197754, "step": 86520 }, { "epoch": 0.6984816318623217, "grad_norm": 0.9086382985115051, "learning_rate": 6.034425660121323e-06, "loss": 3.3459964752197267, "step": 86530 }, { "epoch": 0.6985623531880888, "grad_norm": 1.2406671047210693, "learning_rate": 6.03281019038311e-06, "loss": 2.973193550109863, "step": 86540 }, { "epoch": 0.6986430745138558, "grad_norm": 1.1705105304718018, "learning_rate": 6.0311947206448964e-06, "loss": 2.7598503112792967, "step": 86550 }, { "epoch": 0.6987237958396229, "grad_norm": 1.2783397436141968, "learning_rate": 6.029579250906683e-06, "loss": 2.5621307373046873, "step": 86560 }, { "epoch": 0.6988045171653899, "grad_norm": 1.20671546459198, "learning_rate": 6.02796378116847e-06, "loss": 2.513813781738281, "step": 86570 }, { "epoch": 0.698885238491157, "grad_norm": 1.032712697982788, "learning_rate": 6.026348311430257e-06, "loss": 2.4938602447509766, "step": 86580 }, { "epoch": 0.698965959816924, "grad_norm": 0.7634740471839905, "learning_rate": 6.024732841692044e-06, "loss": 2.734233283996582, "step": 86590 }, { "epoch": 0.6990466811426911, "grad_norm": 0.9254095554351807, "learning_rate": 6.023117371953831e-06, "loss": 2.329842376708984, "step": 86600 }, { "epoch": 0.6991274024684582, "grad_norm": 1.002601146697998, "learning_rate": 6.0215019022156175e-06, "loss": 2.4799381256103517, "step": 86610 }, { "epoch": 0.6992081237942251, "grad_norm": 1.1722581386566162, "learning_rate": 6.019886432477404e-06, "loss": 2.8047887802124025, "step": 86620 }, { "epoch": 0.6992888451199922, "grad_norm": 1.0654785633087158, "learning_rate": 6.018270962739191e-06, "loss": 2.638222503662109, "step": 86630 }, { "epoch": 0.6993695664457593, "grad_norm": 1.8642678260803223, "learning_rate": 6.016655493000978e-06, "loss": 2.8838579177856447, "step": 86640 }, { "epoch": 0.6994502877715264, "grad_norm": 0.6703441143035889, "learning_rate": 6.015040023262765e-06, "loss": 2.547301483154297, "step": 86650 }, { "epoch": 0.6995310090972934, "grad_norm": 0.8693261742591858, "learning_rate": 6.013424553524552e-06, "loss": 2.7680198669433596, "step": 86660 }, { "epoch": 0.6996117304230605, "grad_norm": 0.7289853096008301, "learning_rate": 6.011809083786339e-06, "loss": 2.3624290466308593, "step": 86670 }, { "epoch": 0.6996924517488275, "grad_norm": 0.6112805008888245, "learning_rate": 6.0101936140481254e-06, "loss": 2.831900787353516, "step": 86680 }, { "epoch": 0.6997731730745945, "grad_norm": 1.0500257015228271, "learning_rate": 6.008578144309912e-06, "loss": 3.046900177001953, "step": 86690 }, { "epoch": 0.6998538944003616, "grad_norm": 0.9434629678726196, "learning_rate": 6.006962674571699e-06, "loss": 2.319783401489258, "step": 86700 }, { "epoch": 0.6999346157261287, "grad_norm": 1.1424907445907593, "learning_rate": 6.005347204833486e-06, "loss": 2.693271827697754, "step": 86710 }, { "epoch": 0.7000153370518958, "grad_norm": 0.8909911513328552, "learning_rate": 6.003731735095273e-06, "loss": 2.924460792541504, "step": 86720 }, { "epoch": 0.7000960583776628, "grad_norm": 1.1632496118545532, "learning_rate": 6.00211626535706e-06, "loss": 2.907734680175781, "step": 86730 }, { "epoch": 0.7001767797034298, "grad_norm": 0.9443807601928711, "learning_rate": 6.0005007956188465e-06, "loss": 2.4492321014404297, "step": 86740 }, { "epoch": 0.7002575010291969, "grad_norm": 0.9739731550216675, "learning_rate": 5.998885325880633e-06, "loss": 2.5822988510131837, "step": 86750 }, { "epoch": 0.700338222354964, "grad_norm": 0.7762337327003479, "learning_rate": 5.99726985614242e-06, "loss": 2.4615453720092773, "step": 86760 }, { "epoch": 0.700418943680731, "grad_norm": 2.1519880294799805, "learning_rate": 5.995654386404207e-06, "loss": 2.507663345336914, "step": 86770 }, { "epoch": 0.7004996650064981, "grad_norm": 1.9922590255737305, "learning_rate": 5.994038916665994e-06, "loss": 3.0452619552612306, "step": 86780 }, { "epoch": 0.7005803863322652, "grad_norm": 0.5569024085998535, "learning_rate": 5.992423446927781e-06, "loss": 2.6055908203125, "step": 86790 }, { "epoch": 0.7006611076580321, "grad_norm": 1.4085818529129028, "learning_rate": 5.990807977189568e-06, "loss": 3.2395267486572266, "step": 86800 }, { "epoch": 0.7007418289837992, "grad_norm": 0.6981978416442871, "learning_rate": 5.9891925074513544e-06, "loss": 2.604118537902832, "step": 86810 }, { "epoch": 0.7008225503095663, "grad_norm": 0.6272255778312683, "learning_rate": 5.987577037713141e-06, "loss": 2.4094785690307616, "step": 86820 }, { "epoch": 0.7009032716353334, "grad_norm": 0.5807411670684814, "learning_rate": 5.985961567974928e-06, "loss": 2.105110740661621, "step": 86830 }, { "epoch": 0.7009839929611004, "grad_norm": 0.7022899389266968, "learning_rate": 5.984346098236715e-06, "loss": 2.556868553161621, "step": 86840 }, { "epoch": 0.7010647142868675, "grad_norm": 0.6952279210090637, "learning_rate": 5.982730628498502e-06, "loss": 2.5530975341796873, "step": 86850 }, { "epoch": 0.7011454356126345, "grad_norm": 0.9859820604324341, "learning_rate": 5.981115158760289e-06, "loss": 2.4344730377197266, "step": 86860 }, { "epoch": 0.7012261569384015, "grad_norm": 0.5193649530410767, "learning_rate": 5.9794996890220755e-06, "loss": 2.4241342544555664, "step": 86870 }, { "epoch": 0.7013068782641686, "grad_norm": 0.8983566761016846, "learning_rate": 5.977884219283862e-06, "loss": 2.765031433105469, "step": 86880 }, { "epoch": 0.7013875995899357, "grad_norm": 0.6435028910636902, "learning_rate": 5.976268749545649e-06, "loss": 2.757334899902344, "step": 86890 }, { "epoch": 0.7014683209157028, "grad_norm": 1.2351980209350586, "learning_rate": 5.974653279807436e-06, "loss": 2.7555238723754885, "step": 86900 }, { "epoch": 0.7015490422414697, "grad_norm": 0.9675329923629761, "learning_rate": 5.973037810069223e-06, "loss": 3.18812255859375, "step": 86910 }, { "epoch": 0.7016297635672368, "grad_norm": 2.3431246280670166, "learning_rate": 5.97142234033101e-06, "loss": 3.0047628402709963, "step": 86920 }, { "epoch": 0.7017104848930039, "grad_norm": 1.2856340408325195, "learning_rate": 5.969806870592797e-06, "loss": 2.38868408203125, "step": 86930 }, { "epoch": 0.7017912062187709, "grad_norm": 1.162122130393982, "learning_rate": 5.968191400854583e-06, "loss": 2.934014892578125, "step": 86940 }, { "epoch": 0.701871927544538, "grad_norm": 0.6983112692832947, "learning_rate": 5.96657593111637e-06, "loss": 3.201823425292969, "step": 86950 }, { "epoch": 0.7019526488703051, "grad_norm": 0.7152829766273499, "learning_rate": 5.964960461378158e-06, "loss": 3.021142578125, "step": 86960 }, { "epoch": 0.7020333701960721, "grad_norm": 1.4418838024139404, "learning_rate": 5.963344991639945e-06, "loss": 2.9359663009643553, "step": 86970 }, { "epoch": 0.7021140915218391, "grad_norm": 1.0165412425994873, "learning_rate": 5.961729521901732e-06, "loss": 2.7006059646606446, "step": 86980 }, { "epoch": 0.7021948128476062, "grad_norm": 0.956752598285675, "learning_rate": 5.9601140521635185e-06, "loss": 2.831376075744629, "step": 86990 }, { "epoch": 0.7022755341733733, "grad_norm": 1.1611477136611938, "learning_rate": 5.958498582425305e-06, "loss": 2.8259252548217773, "step": 87000 }, { "epoch": 0.7023562554991403, "grad_norm": 1.19822096824646, "learning_rate": 5.956883112687092e-06, "loss": 2.7512935638427733, "step": 87010 }, { "epoch": 0.7024369768249074, "grad_norm": 0.977872371673584, "learning_rate": 5.955267642948879e-06, "loss": 2.8240562438964845, "step": 87020 }, { "epoch": 0.7025176981506744, "grad_norm": 1.6550796031951904, "learning_rate": 5.953652173210666e-06, "loss": 2.417876625061035, "step": 87030 }, { "epoch": 0.7025984194764415, "grad_norm": 0.5865835547447205, "learning_rate": 5.952036703472453e-06, "loss": 2.398253631591797, "step": 87040 }, { "epoch": 0.7026791408022085, "grad_norm": 0.6770828366279602, "learning_rate": 5.9504212337342396e-06, "loss": 2.4428241729736326, "step": 87050 }, { "epoch": 0.7027598621279756, "grad_norm": 0.9898715615272522, "learning_rate": 5.948805763996026e-06, "loss": 3.0499319076538085, "step": 87060 }, { "epoch": 0.7028405834537427, "grad_norm": 0.6998144388198853, "learning_rate": 5.947190294257813e-06, "loss": 2.4694826126098635, "step": 87070 }, { "epoch": 0.7029213047795096, "grad_norm": 0.8528274297714233, "learning_rate": 5.9455748245196e-06, "loss": 2.8272960662841795, "step": 87080 }, { "epoch": 0.7030020261052767, "grad_norm": 1.1079306602478027, "learning_rate": 5.943959354781387e-06, "loss": 2.576265335083008, "step": 87090 }, { "epoch": 0.7030827474310438, "grad_norm": 0.5479142665863037, "learning_rate": 5.942343885043174e-06, "loss": 2.5508277893066404, "step": 87100 }, { "epoch": 0.7031634687568109, "grad_norm": 0.8745278716087341, "learning_rate": 5.940728415304961e-06, "loss": 2.3376708984375, "step": 87110 }, { "epoch": 0.7032441900825779, "grad_norm": 1.1986080408096313, "learning_rate": 5.9391129455667475e-06, "loss": 2.7194820404052735, "step": 87120 }, { "epoch": 0.703324911408345, "grad_norm": 1.1015739440917969, "learning_rate": 5.937497475828534e-06, "loss": 2.368779182434082, "step": 87130 }, { "epoch": 0.703405632734112, "grad_norm": 0.9091641306877136, "learning_rate": 5.935882006090321e-06, "loss": 2.20929069519043, "step": 87140 }, { "epoch": 0.703486354059879, "grad_norm": 1.1549534797668457, "learning_rate": 5.934266536352108e-06, "loss": 2.6690013885498045, "step": 87150 }, { "epoch": 0.7035670753856461, "grad_norm": 0.9841340780258179, "learning_rate": 5.932651066613895e-06, "loss": 2.6574310302734374, "step": 87160 }, { "epoch": 0.7036477967114132, "grad_norm": 1.060449242591858, "learning_rate": 5.931035596875682e-06, "loss": 2.3733074188232424, "step": 87170 }, { "epoch": 0.7037285180371803, "grad_norm": 1.3673810958862305, "learning_rate": 5.9294201271374686e-06, "loss": 3.3303733825683595, "step": 87180 }, { "epoch": 0.7038092393629473, "grad_norm": 1.1741271018981934, "learning_rate": 5.927804657399255e-06, "loss": 2.7327058792114256, "step": 87190 }, { "epoch": 0.7038899606887143, "grad_norm": 0.737981915473938, "learning_rate": 5.926189187661042e-06, "loss": 2.6894079208374024, "step": 87200 }, { "epoch": 0.7039706820144814, "grad_norm": 1.184753656387329, "learning_rate": 5.924573717922829e-06, "loss": 3.1836700439453125, "step": 87210 }, { "epoch": 0.7040514033402484, "grad_norm": 1.00259530544281, "learning_rate": 5.922958248184616e-06, "loss": 2.688376617431641, "step": 87220 }, { "epoch": 0.7041321246660155, "grad_norm": 1.2679753303527832, "learning_rate": 5.921342778446403e-06, "loss": 2.359165000915527, "step": 87230 }, { "epoch": 0.7042128459917826, "grad_norm": 0.8715248703956604, "learning_rate": 5.91972730870819e-06, "loss": 2.4327688217163086, "step": 87240 }, { "epoch": 0.7042935673175497, "grad_norm": 1.2218245267868042, "learning_rate": 5.9181118389699765e-06, "loss": 2.679463195800781, "step": 87250 }, { "epoch": 0.7043742886433166, "grad_norm": 0.6228392720222473, "learning_rate": 5.916496369231763e-06, "loss": 2.610919952392578, "step": 87260 }, { "epoch": 0.7044550099690837, "grad_norm": 0.9842358827590942, "learning_rate": 5.91488089949355e-06, "loss": 2.3383201599121093, "step": 87270 }, { "epoch": 0.7045357312948508, "grad_norm": 1.522276520729065, "learning_rate": 5.913265429755337e-06, "loss": 2.683806610107422, "step": 87280 }, { "epoch": 0.7046164526206179, "grad_norm": 0.8734308481216431, "learning_rate": 5.911649960017124e-06, "loss": 2.6818695068359375, "step": 87290 }, { "epoch": 0.7046971739463849, "grad_norm": 1.1041253805160522, "learning_rate": 5.910034490278911e-06, "loss": 2.346780776977539, "step": 87300 }, { "epoch": 0.704777895272152, "grad_norm": 0.838164746761322, "learning_rate": 5.9084190205406976e-06, "loss": 3.3046142578125, "step": 87310 }, { "epoch": 0.704858616597919, "grad_norm": 1.0630667209625244, "learning_rate": 5.906803550802484e-06, "loss": 2.3786026000976563, "step": 87320 }, { "epoch": 0.704939337923686, "grad_norm": 0.7767590284347534, "learning_rate": 5.905188081064271e-06, "loss": 2.2891305923461913, "step": 87330 }, { "epoch": 0.7050200592494531, "grad_norm": 1.3993072509765625, "learning_rate": 5.903572611326058e-06, "loss": 3.0438600540161134, "step": 87340 }, { "epoch": 0.7051007805752202, "grad_norm": 1.0715270042419434, "learning_rate": 5.901957141587845e-06, "loss": 2.3343467712402344, "step": 87350 }, { "epoch": 0.7051815019009873, "grad_norm": 1.8846523761749268, "learning_rate": 5.900341671849632e-06, "loss": 2.5970970153808595, "step": 87360 }, { "epoch": 0.7052622232267542, "grad_norm": 1.0663394927978516, "learning_rate": 5.898726202111419e-06, "loss": 3.1050907135009767, "step": 87370 }, { "epoch": 0.7053429445525213, "grad_norm": 1.000781536102295, "learning_rate": 5.8971107323732055e-06, "loss": 2.580815887451172, "step": 87380 }, { "epoch": 0.7054236658782884, "grad_norm": 0.9082821607589722, "learning_rate": 5.895495262634994e-06, "loss": 2.63082275390625, "step": 87390 }, { "epoch": 0.7055043872040554, "grad_norm": 0.8893555998802185, "learning_rate": 5.893879792896781e-06, "loss": 2.725233459472656, "step": 87400 }, { "epoch": 0.7055851085298225, "grad_norm": 1.1403355598449707, "learning_rate": 5.892264323158568e-06, "loss": 2.845384216308594, "step": 87410 }, { "epoch": 0.7056658298555896, "grad_norm": 0.8225805759429932, "learning_rate": 5.8906488534203546e-06, "loss": 2.8272666931152344, "step": 87420 }, { "epoch": 0.7057465511813567, "grad_norm": 0.5621907114982605, "learning_rate": 5.889033383682141e-06, "loss": 2.793089485168457, "step": 87430 }, { "epoch": 0.7058272725071236, "grad_norm": 1.003030776977539, "learning_rate": 5.887417913943928e-06, "loss": 2.3389049530029298, "step": 87440 }, { "epoch": 0.7059079938328907, "grad_norm": 0.8780812621116638, "learning_rate": 5.885802444205715e-06, "loss": 2.9668115615844726, "step": 87450 }, { "epoch": 0.7059887151586578, "grad_norm": 1.3174386024475098, "learning_rate": 5.884186974467502e-06, "loss": 2.6350286483764647, "step": 87460 }, { "epoch": 0.7060694364844248, "grad_norm": 0.7736964225769043, "learning_rate": 5.882571504729289e-06, "loss": 2.4375118255615233, "step": 87470 }, { "epoch": 0.7061501578101919, "grad_norm": 1.2741252183914185, "learning_rate": 5.880956034991076e-06, "loss": 2.5547971725463867, "step": 87480 }, { "epoch": 0.7062308791359589, "grad_norm": 0.7888036370277405, "learning_rate": 5.8793405652528625e-06, "loss": 2.504668617248535, "step": 87490 }, { "epoch": 0.706311600461726, "grad_norm": 0.9533234238624573, "learning_rate": 5.877725095514649e-06, "loss": 3.058028793334961, "step": 87500 }, { "epoch": 0.706392321787493, "grad_norm": 1.0694501399993896, "learning_rate": 5.876109625776436e-06, "loss": 3.0879100799560546, "step": 87510 }, { "epoch": 0.7064730431132601, "grad_norm": 0.5799717307090759, "learning_rate": 5.874494156038223e-06, "loss": 2.409296417236328, "step": 87520 }, { "epoch": 0.7065537644390272, "grad_norm": 1.7430695295333862, "learning_rate": 5.87287868630001e-06, "loss": 2.8120294570922852, "step": 87530 }, { "epoch": 0.7066344857647942, "grad_norm": 1.196587324142456, "learning_rate": 5.871263216561797e-06, "loss": 2.8939844131469727, "step": 87540 }, { "epoch": 0.7067152070905612, "grad_norm": 0.8848346471786499, "learning_rate": 5.8696477468235836e-06, "loss": 2.6874217987060547, "step": 87550 }, { "epoch": 0.7067959284163283, "grad_norm": 1.1105704307556152, "learning_rate": 5.86803227708537e-06, "loss": 2.7408966064453124, "step": 87560 }, { "epoch": 0.7068766497420954, "grad_norm": 1.1560313701629639, "learning_rate": 5.866416807347157e-06, "loss": 2.3311159133911135, "step": 87570 }, { "epoch": 0.7069573710678624, "grad_norm": 1.1266357898712158, "learning_rate": 5.864801337608944e-06, "loss": 2.7952075958251954, "step": 87580 }, { "epoch": 0.7070380923936295, "grad_norm": 0.741193413734436, "learning_rate": 5.863185867870731e-06, "loss": 2.881459426879883, "step": 87590 }, { "epoch": 0.7071188137193966, "grad_norm": 1.3291354179382324, "learning_rate": 5.861570398132518e-06, "loss": 2.5523445129394533, "step": 87600 }, { "epoch": 0.7071995350451635, "grad_norm": 0.9764088988304138, "learning_rate": 5.859954928394305e-06, "loss": 3.0231067657470705, "step": 87610 }, { "epoch": 0.7072802563709306, "grad_norm": 1.163407564163208, "learning_rate": 5.8583394586560915e-06, "loss": 3.0035804748535155, "step": 87620 }, { "epoch": 0.7073609776966977, "grad_norm": 1.0339267253875732, "learning_rate": 5.856723988917878e-06, "loss": 2.6786083221435546, "step": 87630 }, { "epoch": 0.7074416990224648, "grad_norm": 0.7129431366920471, "learning_rate": 5.855108519179665e-06, "loss": 2.7174964904785157, "step": 87640 }, { "epoch": 0.7075224203482318, "grad_norm": 1.4337477684020996, "learning_rate": 5.853493049441452e-06, "loss": 2.9152830123901365, "step": 87650 }, { "epoch": 0.7076031416739988, "grad_norm": 1.2482781410217285, "learning_rate": 5.851877579703239e-06, "loss": 2.5868627548217775, "step": 87660 }, { "epoch": 0.7076838629997659, "grad_norm": 0.7728487253189087, "learning_rate": 5.850262109965026e-06, "loss": 2.484926223754883, "step": 87670 }, { "epoch": 0.7077645843255329, "grad_norm": 0.8221992254257202, "learning_rate": 5.8486466402268125e-06, "loss": 2.3614694595336916, "step": 87680 }, { "epoch": 0.7078453056513, "grad_norm": 0.6791391968727112, "learning_rate": 5.847031170488599e-06, "loss": 2.6097787857055663, "step": 87690 }, { "epoch": 0.7079260269770671, "grad_norm": 1.04791259765625, "learning_rate": 5.845415700750386e-06, "loss": 2.5455863952636717, "step": 87700 }, { "epoch": 0.7080067483028342, "grad_norm": 0.9642658829689026, "learning_rate": 5.843800231012173e-06, "loss": 2.557194137573242, "step": 87710 }, { "epoch": 0.7080874696286011, "grad_norm": 0.6028994917869568, "learning_rate": 5.84218476127396e-06, "loss": 2.5370773315429687, "step": 87720 }, { "epoch": 0.7081681909543682, "grad_norm": 0.9249045848846436, "learning_rate": 5.840569291535747e-06, "loss": 2.5610073089599608, "step": 87730 }, { "epoch": 0.7082489122801353, "grad_norm": 1.089392900466919, "learning_rate": 5.838953821797534e-06, "loss": 2.687973213195801, "step": 87740 }, { "epoch": 0.7083296336059024, "grad_norm": 0.8267053365707397, "learning_rate": 5.8373383520593205e-06, "loss": 2.6256837844848633, "step": 87750 }, { "epoch": 0.7084103549316694, "grad_norm": 0.7922502160072327, "learning_rate": 5.835722882321107e-06, "loss": 2.6260730743408205, "step": 87760 }, { "epoch": 0.7084910762574365, "grad_norm": 0.7195611596107483, "learning_rate": 5.834107412582894e-06, "loss": 2.780453109741211, "step": 87770 }, { "epoch": 0.7085717975832035, "grad_norm": 0.7577419877052307, "learning_rate": 5.832491942844681e-06, "loss": 2.829986572265625, "step": 87780 }, { "epoch": 0.7086525189089705, "grad_norm": 1.3895516395568848, "learning_rate": 5.830876473106468e-06, "loss": 2.5757333755493166, "step": 87790 }, { "epoch": 0.7087332402347376, "grad_norm": 0.7768660187721252, "learning_rate": 5.829261003368255e-06, "loss": 2.564392852783203, "step": 87800 }, { "epoch": 0.7088139615605047, "grad_norm": 0.9405401945114136, "learning_rate": 5.8276455336300415e-06, "loss": 2.6652429580688475, "step": 87810 }, { "epoch": 0.7088946828862718, "grad_norm": 0.8874611258506775, "learning_rate": 5.826030063891828e-06, "loss": 2.530342674255371, "step": 87820 }, { "epoch": 0.7089754042120388, "grad_norm": 1.0384347438812256, "learning_rate": 5.824414594153615e-06, "loss": 2.642339897155762, "step": 87830 }, { "epoch": 0.7090561255378058, "grad_norm": 1.0835319757461548, "learning_rate": 5.822799124415403e-06, "loss": 2.364136505126953, "step": 87840 }, { "epoch": 0.7091368468635729, "grad_norm": 0.7878760099411011, "learning_rate": 5.82118365467719e-06, "loss": 2.9406843185424805, "step": 87850 }, { "epoch": 0.7092175681893399, "grad_norm": 0.7949305772781372, "learning_rate": 5.819568184938977e-06, "loss": 2.4867122650146483, "step": 87860 }, { "epoch": 0.709298289515107, "grad_norm": 0.9424113631248474, "learning_rate": 5.8179527152007635e-06, "loss": 2.6046348571777345, "step": 87870 }, { "epoch": 0.7093790108408741, "grad_norm": 1.0732884407043457, "learning_rate": 5.81633724546255e-06, "loss": 2.5537082672119142, "step": 87880 }, { "epoch": 0.7094597321666412, "grad_norm": 1.015398383140564, "learning_rate": 5.814721775724337e-06, "loss": 2.334391212463379, "step": 87890 }, { "epoch": 0.7095404534924081, "grad_norm": 0.954729437828064, "learning_rate": 5.813106305986124e-06, "loss": 2.6851518630981444, "step": 87900 }, { "epoch": 0.7096211748181752, "grad_norm": 0.8643413782119751, "learning_rate": 5.811490836247911e-06, "loss": 2.777009963989258, "step": 87910 }, { "epoch": 0.7097018961439423, "grad_norm": 1.2928236722946167, "learning_rate": 5.809875366509698e-06, "loss": 2.968012809753418, "step": 87920 }, { "epoch": 0.7097826174697093, "grad_norm": 0.9594327807426453, "learning_rate": 5.8082598967714845e-06, "loss": 2.4234855651855467, "step": 87930 }, { "epoch": 0.7098633387954764, "grad_norm": 0.7829413414001465, "learning_rate": 5.806644427033271e-06, "loss": 2.6237274169921876, "step": 87940 }, { "epoch": 0.7099440601212434, "grad_norm": 1.0474306344985962, "learning_rate": 5.805028957295058e-06, "loss": 2.67055606842041, "step": 87950 }, { "epoch": 0.7100247814470105, "grad_norm": 1.412259578704834, "learning_rate": 5.803413487556845e-06, "loss": 2.288652038574219, "step": 87960 }, { "epoch": 0.7101055027727775, "grad_norm": 1.0821824073791504, "learning_rate": 5.801798017818632e-06, "loss": 2.501452827453613, "step": 87970 }, { "epoch": 0.7101862240985446, "grad_norm": 0.8091712594032288, "learning_rate": 5.800182548080419e-06, "loss": 2.350822639465332, "step": 87980 }, { "epoch": 0.7102669454243117, "grad_norm": 1.1702734231948853, "learning_rate": 5.798567078342206e-06, "loss": 2.8182050704956056, "step": 87990 }, { "epoch": 0.7103476667500787, "grad_norm": 0.8007040023803711, "learning_rate": 5.7969516086039925e-06, "loss": 2.7103147506713867, "step": 88000 }, { "epoch": 0.7104283880758457, "grad_norm": 0.9941033720970154, "learning_rate": 5.795336138865779e-06, "loss": 2.427789878845215, "step": 88010 }, { "epoch": 0.7105091094016128, "grad_norm": 1.327558159828186, "learning_rate": 5.793720669127566e-06, "loss": 2.248866653442383, "step": 88020 }, { "epoch": 0.7105898307273799, "grad_norm": 1.1990834474563599, "learning_rate": 5.792105199389353e-06, "loss": 2.3856754302978516, "step": 88030 }, { "epoch": 0.7106705520531469, "grad_norm": 0.9641252756118774, "learning_rate": 5.79048972965114e-06, "loss": 2.4115325927734377, "step": 88040 }, { "epoch": 0.710751273378914, "grad_norm": 1.0219203233718872, "learning_rate": 5.788874259912927e-06, "loss": 2.471927452087402, "step": 88050 }, { "epoch": 0.7108319947046811, "grad_norm": 1.1063835620880127, "learning_rate": 5.7872587901747135e-06, "loss": 3.0489850997924806, "step": 88060 }, { "epoch": 0.710912716030448, "grad_norm": 0.9160643219947815, "learning_rate": 5.7856433204365e-06, "loss": 2.443130683898926, "step": 88070 }, { "epoch": 0.7109934373562151, "grad_norm": 0.5443029403686523, "learning_rate": 5.784027850698287e-06, "loss": 2.4689346313476563, "step": 88080 }, { "epoch": 0.7110741586819822, "grad_norm": 1.0382150411605835, "learning_rate": 5.782412380960074e-06, "loss": 2.485690689086914, "step": 88090 }, { "epoch": 0.7111548800077493, "grad_norm": 0.6561378836631775, "learning_rate": 5.780796911221861e-06, "loss": 2.5631858825683596, "step": 88100 }, { "epoch": 0.7112356013335163, "grad_norm": 1.3067768812179565, "learning_rate": 5.779181441483648e-06, "loss": 2.6256195068359376, "step": 88110 }, { "epoch": 0.7113163226592834, "grad_norm": 1.690407395362854, "learning_rate": 5.777565971745435e-06, "loss": 2.431760787963867, "step": 88120 }, { "epoch": 0.7113970439850504, "grad_norm": 1.2364633083343506, "learning_rate": 5.7759505020072214e-06, "loss": 2.7480758666992187, "step": 88130 }, { "epoch": 0.7114777653108174, "grad_norm": 0.8595041632652283, "learning_rate": 5.774335032269008e-06, "loss": 2.346774673461914, "step": 88140 }, { "epoch": 0.7115584866365845, "grad_norm": 1.4514760971069336, "learning_rate": 5.772719562530795e-06, "loss": 2.155211067199707, "step": 88150 }, { "epoch": 0.7116392079623516, "grad_norm": 1.6298420429229736, "learning_rate": 5.771104092792582e-06, "loss": 2.9052141189575194, "step": 88160 }, { "epoch": 0.7117199292881187, "grad_norm": 0.7746297121047974, "learning_rate": 5.769488623054369e-06, "loss": 2.316350555419922, "step": 88170 }, { "epoch": 0.7118006506138856, "grad_norm": 1.0705194473266602, "learning_rate": 5.767873153316156e-06, "loss": 2.462751579284668, "step": 88180 }, { "epoch": 0.7118813719396527, "grad_norm": 0.8911309242248535, "learning_rate": 5.7662576835779425e-06, "loss": 2.6957250595092774, "step": 88190 }, { "epoch": 0.7119620932654198, "grad_norm": 1.4178643226623535, "learning_rate": 5.764642213839729e-06, "loss": 2.6149127960205076, "step": 88200 }, { "epoch": 0.7120428145911869, "grad_norm": 0.7131596803665161, "learning_rate": 5.763026744101516e-06, "loss": 2.9774627685546875, "step": 88210 }, { "epoch": 0.7121235359169539, "grad_norm": 0.641554057598114, "learning_rate": 5.761411274363303e-06, "loss": 2.683846092224121, "step": 88220 }, { "epoch": 0.712204257242721, "grad_norm": 1.2354137897491455, "learning_rate": 5.75979580462509e-06, "loss": 3.001058006286621, "step": 88230 }, { "epoch": 0.712284978568488, "grad_norm": 0.717315137386322, "learning_rate": 5.758180334886877e-06, "loss": 2.6850351333618163, "step": 88240 }, { "epoch": 0.712365699894255, "grad_norm": 0.7443113327026367, "learning_rate": 5.756564865148664e-06, "loss": 2.9058250427246093, "step": 88250 }, { "epoch": 0.7124464212200221, "grad_norm": 1.2374440431594849, "learning_rate": 5.7549493954104504e-06, "loss": 2.4579599380493162, "step": 88260 }, { "epoch": 0.7125271425457892, "grad_norm": 0.936079204082489, "learning_rate": 5.753333925672237e-06, "loss": 2.6163089752197264, "step": 88270 }, { "epoch": 0.7126078638715563, "grad_norm": 1.2036504745483398, "learning_rate": 5.751718455934024e-06, "loss": 2.569720458984375, "step": 88280 }, { "epoch": 0.7126885851973233, "grad_norm": 0.6533269882202148, "learning_rate": 5.750102986195811e-06, "loss": 2.3473392486572267, "step": 88290 }, { "epoch": 0.7127693065230903, "grad_norm": 1.52134370803833, "learning_rate": 5.748487516457598e-06, "loss": 3.267996597290039, "step": 88300 }, { "epoch": 0.7128500278488574, "grad_norm": 1.0027467012405396, "learning_rate": 5.746872046719385e-06, "loss": 2.3521856307983398, "step": 88310 }, { "epoch": 0.7129307491746244, "grad_norm": 0.9105314612388611, "learning_rate": 5.7452565769811715e-06, "loss": 2.42053165435791, "step": 88320 }, { "epoch": 0.7130114705003915, "grad_norm": 1.0701836347579956, "learning_rate": 5.743641107242958e-06, "loss": 3.0042417526245115, "step": 88330 }, { "epoch": 0.7130921918261586, "grad_norm": 0.5878394842147827, "learning_rate": 5.742025637504745e-06, "loss": 2.9297100067138673, "step": 88340 }, { "epoch": 0.7131729131519257, "grad_norm": 0.9164203405380249, "learning_rate": 5.740410167766533e-06, "loss": 2.5940332412719727, "step": 88350 }, { "epoch": 0.7132536344776926, "grad_norm": 0.8608697056770325, "learning_rate": 5.73879469802832e-06, "loss": 2.682491493225098, "step": 88360 }, { "epoch": 0.7133343558034597, "grad_norm": 0.7867713570594788, "learning_rate": 5.737179228290107e-06, "loss": 2.301142120361328, "step": 88370 }, { "epoch": 0.7134150771292268, "grad_norm": 1.0881022214889526, "learning_rate": 5.7355637585518934e-06, "loss": 2.948496627807617, "step": 88380 }, { "epoch": 0.7134957984549938, "grad_norm": 0.9707879424095154, "learning_rate": 5.73394828881368e-06, "loss": 2.512673568725586, "step": 88390 }, { "epoch": 0.7135765197807609, "grad_norm": 0.9776231646537781, "learning_rate": 5.732332819075467e-06, "loss": 2.539811706542969, "step": 88400 }, { "epoch": 0.713657241106528, "grad_norm": 0.9806604981422424, "learning_rate": 5.730717349337254e-06, "loss": 3.3576263427734374, "step": 88410 }, { "epoch": 0.713737962432295, "grad_norm": 1.59064781665802, "learning_rate": 5.729101879599041e-06, "loss": 2.444476318359375, "step": 88420 }, { "epoch": 0.713818683758062, "grad_norm": 0.6718634963035583, "learning_rate": 5.727486409860828e-06, "loss": 2.4588518142700195, "step": 88430 }, { "epoch": 0.7138994050838291, "grad_norm": 0.6220231056213379, "learning_rate": 5.7258709401226145e-06, "loss": 3.0919599533081055, "step": 88440 }, { "epoch": 0.7139801264095962, "grad_norm": 1.3936930894851685, "learning_rate": 5.724255470384401e-06, "loss": 2.731406402587891, "step": 88450 }, { "epoch": 0.7140608477353632, "grad_norm": 0.8128446340560913, "learning_rate": 5.722640000646188e-06, "loss": 2.6624921798706054, "step": 88460 }, { "epoch": 0.7141415690611302, "grad_norm": 0.8430629968643188, "learning_rate": 5.721024530907975e-06, "loss": 2.4389678955078127, "step": 88470 }, { "epoch": 0.7142222903868973, "grad_norm": 0.8275231122970581, "learning_rate": 5.719409061169762e-06, "loss": 2.5461357116699217, "step": 88480 }, { "epoch": 0.7143030117126644, "grad_norm": 1.1582435369491577, "learning_rate": 5.717793591431549e-06, "loss": 2.8152233123779298, "step": 88490 }, { "epoch": 0.7143837330384314, "grad_norm": 1.0633492469787598, "learning_rate": 5.716178121693336e-06, "loss": 2.5498336791992187, "step": 88500 }, { "epoch": 0.7144644543641985, "grad_norm": 1.5647449493408203, "learning_rate": 5.7145626519551224e-06, "loss": 2.7507755279541017, "step": 88510 }, { "epoch": 0.7145451756899656, "grad_norm": 1.3703445196151733, "learning_rate": 5.712947182216909e-06, "loss": 2.2017988204956054, "step": 88520 }, { "epoch": 0.7146258970157325, "grad_norm": 1.4373722076416016, "learning_rate": 5.711331712478696e-06, "loss": 2.3074462890625, "step": 88530 }, { "epoch": 0.7147066183414996, "grad_norm": 1.528952717781067, "learning_rate": 5.709716242740483e-06, "loss": 2.656743621826172, "step": 88540 }, { "epoch": 0.7147873396672667, "grad_norm": 0.9457410573959351, "learning_rate": 5.70810077300227e-06, "loss": 2.8773567199707033, "step": 88550 }, { "epoch": 0.7148680609930338, "grad_norm": 0.7508544325828552, "learning_rate": 5.706485303264057e-06, "loss": 2.645831298828125, "step": 88560 }, { "epoch": 0.7149487823188008, "grad_norm": 0.7060186266899109, "learning_rate": 5.7048698335258435e-06, "loss": 2.672870063781738, "step": 88570 }, { "epoch": 0.7150295036445679, "grad_norm": 0.8669369220733643, "learning_rate": 5.70325436378763e-06, "loss": 2.582713508605957, "step": 88580 }, { "epoch": 0.7151102249703349, "grad_norm": 0.6291946172714233, "learning_rate": 5.701638894049417e-06, "loss": 2.460079574584961, "step": 88590 }, { "epoch": 0.7151909462961019, "grad_norm": 0.5849506855010986, "learning_rate": 5.700023424311204e-06, "loss": 2.0922052383422853, "step": 88600 }, { "epoch": 0.715271667621869, "grad_norm": 1.2769932746887207, "learning_rate": 5.698407954572991e-06, "loss": 3.1428083419799804, "step": 88610 }, { "epoch": 0.7153523889476361, "grad_norm": 1.0910969972610474, "learning_rate": 5.696792484834778e-06, "loss": 2.2456644058227537, "step": 88620 }, { "epoch": 0.7154331102734032, "grad_norm": 0.6631370186805725, "learning_rate": 5.695177015096565e-06, "loss": 2.4268056869506838, "step": 88630 }, { "epoch": 0.7155138315991701, "grad_norm": 1.8888543844223022, "learning_rate": 5.693561545358352e-06, "loss": 2.8637458801269533, "step": 88640 }, { "epoch": 0.7155945529249372, "grad_norm": 0.9790093302726746, "learning_rate": 5.691946075620139e-06, "loss": 2.619350624084473, "step": 88650 }, { "epoch": 0.7156752742507043, "grad_norm": 1.2199225425720215, "learning_rate": 5.690330605881926e-06, "loss": 2.8925453186035157, "step": 88660 }, { "epoch": 0.7157559955764713, "grad_norm": 0.8196404576301575, "learning_rate": 5.688715136143713e-06, "loss": 2.5422950744628907, "step": 88670 }, { "epoch": 0.7158367169022384, "grad_norm": 0.8785959482192993, "learning_rate": 5.6870996664055e-06, "loss": 2.6020198822021485, "step": 88680 }, { "epoch": 0.7159174382280055, "grad_norm": 0.8601054549217224, "learning_rate": 5.6854841966672865e-06, "loss": 2.7819774627685545, "step": 88690 }, { "epoch": 0.7159981595537726, "grad_norm": 1.5202178955078125, "learning_rate": 5.683868726929073e-06, "loss": 2.5206493377685546, "step": 88700 }, { "epoch": 0.7160788808795395, "grad_norm": 1.083269715309143, "learning_rate": 5.682253257190861e-06, "loss": 2.5292232513427733, "step": 88710 }, { "epoch": 0.7161596022053066, "grad_norm": 0.8423122763633728, "learning_rate": 5.680637787452648e-06, "loss": 2.5613849639892576, "step": 88720 }, { "epoch": 0.7162403235310737, "grad_norm": 0.9811248779296875, "learning_rate": 5.679022317714435e-06, "loss": 2.410505485534668, "step": 88730 }, { "epoch": 0.7163210448568408, "grad_norm": 0.9837154746055603, "learning_rate": 5.677406847976222e-06, "loss": 2.6782655715942383, "step": 88740 }, { "epoch": 0.7164017661826078, "grad_norm": 1.3378487825393677, "learning_rate": 5.675791378238008e-06, "loss": 2.816182518005371, "step": 88750 }, { "epoch": 0.7164824875083748, "grad_norm": 0.9039528369903564, "learning_rate": 5.674175908499795e-06, "loss": 2.89382381439209, "step": 88760 }, { "epoch": 0.7165632088341419, "grad_norm": 1.3214523792266846, "learning_rate": 5.672560438761582e-06, "loss": 2.6082468032836914, "step": 88770 }, { "epoch": 0.7166439301599089, "grad_norm": 1.0853222608566284, "learning_rate": 5.670944969023369e-06, "loss": 3.033101272583008, "step": 88780 }, { "epoch": 0.716724651485676, "grad_norm": 0.6821936368942261, "learning_rate": 5.669329499285156e-06, "loss": 2.1925674438476563, "step": 88790 }, { "epoch": 0.7168053728114431, "grad_norm": 0.9763902425765991, "learning_rate": 5.667714029546943e-06, "loss": 2.9426864624023437, "step": 88800 }, { "epoch": 0.7168860941372102, "grad_norm": 1.1618554592132568, "learning_rate": 5.6660985598087295e-06, "loss": 2.5733304977416993, "step": 88810 }, { "epoch": 0.7169668154629771, "grad_norm": 1.048413872718811, "learning_rate": 5.664483090070516e-06, "loss": 2.698221206665039, "step": 88820 }, { "epoch": 0.7170475367887442, "grad_norm": 0.6251826286315918, "learning_rate": 5.662867620332303e-06, "loss": 2.7885978698730467, "step": 88830 }, { "epoch": 0.7171282581145113, "grad_norm": 1.0399540662765503, "learning_rate": 5.66125215059409e-06, "loss": 2.823311996459961, "step": 88840 }, { "epoch": 0.7172089794402783, "grad_norm": 0.951225221157074, "learning_rate": 5.659636680855877e-06, "loss": 2.532509994506836, "step": 88850 }, { "epoch": 0.7172897007660454, "grad_norm": 0.6546611189842224, "learning_rate": 5.658021211117664e-06, "loss": 2.371795082092285, "step": 88860 }, { "epoch": 0.7173704220918125, "grad_norm": 0.8282282948493958, "learning_rate": 5.6564057413794506e-06, "loss": 2.8904756546020507, "step": 88870 }, { "epoch": 0.7174511434175795, "grad_norm": 0.773699164390564, "learning_rate": 5.654790271641237e-06, "loss": 2.504405212402344, "step": 88880 }, { "epoch": 0.7175318647433465, "grad_norm": 1.2037392854690552, "learning_rate": 5.653174801903024e-06, "loss": 2.7120113372802734, "step": 88890 }, { "epoch": 0.7176125860691136, "grad_norm": 0.8954760432243347, "learning_rate": 5.651559332164811e-06, "loss": 2.8857831954956055, "step": 88900 }, { "epoch": 0.7176933073948807, "grad_norm": 0.5980796813964844, "learning_rate": 5.649943862426598e-06, "loss": 2.64178581237793, "step": 88910 }, { "epoch": 0.7177740287206477, "grad_norm": 1.1034070253372192, "learning_rate": 5.648328392688385e-06, "loss": 2.2387920379638673, "step": 88920 }, { "epoch": 0.7178547500464147, "grad_norm": 1.2029937505722046, "learning_rate": 5.646712922950172e-06, "loss": 2.755601692199707, "step": 88930 }, { "epoch": 0.7179354713721818, "grad_norm": 0.9633546471595764, "learning_rate": 5.6450974532119585e-06, "loss": 2.215018463134766, "step": 88940 }, { "epoch": 0.7180161926979489, "grad_norm": 0.8544266819953918, "learning_rate": 5.643481983473745e-06, "loss": 2.548150062561035, "step": 88950 }, { "epoch": 0.7180969140237159, "grad_norm": 0.7385158538818359, "learning_rate": 5.641866513735532e-06, "loss": 2.752791976928711, "step": 88960 }, { "epoch": 0.718177635349483, "grad_norm": 0.8704546093940735, "learning_rate": 5.640251043997319e-06, "loss": 2.4544445037841798, "step": 88970 }, { "epoch": 0.7182583566752501, "grad_norm": 0.91986483335495, "learning_rate": 5.638635574259106e-06, "loss": 2.286062812805176, "step": 88980 }, { "epoch": 0.718339078001017, "grad_norm": 1.1041665077209473, "learning_rate": 5.637020104520893e-06, "loss": 3.0906721115112306, "step": 88990 }, { "epoch": 0.7184197993267841, "grad_norm": 1.0372012853622437, "learning_rate": 5.6354046347826796e-06, "loss": 2.840964508056641, "step": 89000 }, { "epoch": 0.7185005206525512, "grad_norm": 0.6477327346801758, "learning_rate": 5.633789165044466e-06, "loss": 2.403164291381836, "step": 89010 }, { "epoch": 0.7185812419783183, "grad_norm": 1.0766818523406982, "learning_rate": 5.632173695306253e-06, "loss": 2.590506935119629, "step": 89020 }, { "epoch": 0.7186619633040853, "grad_norm": 1.191774606704712, "learning_rate": 5.63055822556804e-06, "loss": 2.7077095031738283, "step": 89030 }, { "epoch": 0.7187426846298524, "grad_norm": 0.5416512489318848, "learning_rate": 5.628942755829827e-06, "loss": 2.8059412002563477, "step": 89040 }, { "epoch": 0.7188234059556194, "grad_norm": 1.5420364141464233, "learning_rate": 5.627327286091614e-06, "loss": 2.6463598251342773, "step": 89050 }, { "epoch": 0.7189041272813864, "grad_norm": 0.8880930542945862, "learning_rate": 5.625711816353401e-06, "loss": 2.7167732238769533, "step": 89060 }, { "epoch": 0.7189848486071535, "grad_norm": 0.9321022629737854, "learning_rate": 5.6240963466151875e-06, "loss": 2.528875732421875, "step": 89070 }, { "epoch": 0.7190655699329206, "grad_norm": 0.786516547203064, "learning_rate": 5.622480876876974e-06, "loss": 2.8113256454467774, "step": 89080 }, { "epoch": 0.7191462912586877, "grad_norm": 1.033437728881836, "learning_rate": 5.620865407138761e-06, "loss": 2.5501670837402344, "step": 89090 }, { "epoch": 0.7192270125844547, "grad_norm": 0.6621100306510925, "learning_rate": 5.619249937400548e-06, "loss": 2.6592119216918944, "step": 89100 }, { "epoch": 0.7193077339102217, "grad_norm": 1.0407675504684448, "learning_rate": 5.617634467662335e-06, "loss": 2.672176170349121, "step": 89110 }, { "epoch": 0.7193884552359888, "grad_norm": 1.3565481901168823, "learning_rate": 5.616018997924122e-06, "loss": 2.5678678512573243, "step": 89120 }, { "epoch": 0.7194691765617558, "grad_norm": 0.831432580947876, "learning_rate": 5.6144035281859086e-06, "loss": 2.320631408691406, "step": 89130 }, { "epoch": 0.7195498978875229, "grad_norm": 1.7987608909606934, "learning_rate": 5.612788058447695e-06, "loss": 2.7554834365844725, "step": 89140 }, { "epoch": 0.71963061921329, "grad_norm": 1.4285995960235596, "learning_rate": 5.611172588709482e-06, "loss": 2.9030860900878905, "step": 89150 }, { "epoch": 0.719711340539057, "grad_norm": 1.0052578449249268, "learning_rate": 5.609557118971269e-06, "loss": 2.880017852783203, "step": 89160 }, { "epoch": 0.719792061864824, "grad_norm": 0.9069820046424866, "learning_rate": 5.607941649233056e-06, "loss": 2.514841079711914, "step": 89170 }, { "epoch": 0.7198727831905911, "grad_norm": 0.8234655261039734, "learning_rate": 5.606326179494843e-06, "loss": 2.2607688903808594, "step": 89180 }, { "epoch": 0.7199535045163582, "grad_norm": 1.2968984842300415, "learning_rate": 5.60471070975663e-06, "loss": 2.5511781692504885, "step": 89190 }, { "epoch": 0.7200342258421253, "grad_norm": 1.246629238128662, "learning_rate": 5.6030952400184165e-06, "loss": 2.9867156982421874, "step": 89200 }, { "epoch": 0.7201149471678923, "grad_norm": 1.9335963726043701, "learning_rate": 5.601479770280203e-06, "loss": 3.2176036834716797, "step": 89210 }, { "epoch": 0.7201956684936593, "grad_norm": 1.1844135522842407, "learning_rate": 5.599864300541991e-06, "loss": 2.604095458984375, "step": 89220 }, { "epoch": 0.7202763898194264, "grad_norm": 0.9091318249702454, "learning_rate": 5.598248830803778e-06, "loss": 2.8598426818847655, "step": 89230 }, { "epoch": 0.7203571111451934, "grad_norm": 0.7878142595291138, "learning_rate": 5.596633361065565e-06, "loss": 2.70446662902832, "step": 89240 }, { "epoch": 0.7204378324709605, "grad_norm": 0.9332827925682068, "learning_rate": 5.5950178913273516e-06, "loss": 2.559681701660156, "step": 89250 }, { "epoch": 0.7205185537967276, "grad_norm": 0.7855387330055237, "learning_rate": 5.593402421589138e-06, "loss": 3.22656135559082, "step": 89260 }, { "epoch": 0.7205992751224947, "grad_norm": 0.9878302812576294, "learning_rate": 5.591786951850925e-06, "loss": 2.6185850143432616, "step": 89270 }, { "epoch": 0.7206799964482616, "grad_norm": 0.9829348921775818, "learning_rate": 5.590171482112712e-06, "loss": 2.2829574584960937, "step": 89280 }, { "epoch": 0.7207607177740287, "grad_norm": 1.0859702825546265, "learning_rate": 5.588556012374499e-06, "loss": 2.781391906738281, "step": 89290 }, { "epoch": 0.7208414390997958, "grad_norm": 0.8473849296569824, "learning_rate": 5.586940542636286e-06, "loss": 2.6408048629760743, "step": 89300 }, { "epoch": 0.7209221604255628, "grad_norm": 0.8772688508033752, "learning_rate": 5.585325072898073e-06, "loss": 2.8420427322387694, "step": 89310 }, { "epoch": 0.7210028817513299, "grad_norm": 0.9289820790290833, "learning_rate": 5.5837096031598595e-06, "loss": 2.971653175354004, "step": 89320 }, { "epoch": 0.721083603077097, "grad_norm": 1.2188549041748047, "learning_rate": 5.582094133421646e-06, "loss": 3.1016427993774416, "step": 89330 }, { "epoch": 0.721164324402864, "grad_norm": 0.7283843755722046, "learning_rate": 5.580478663683433e-06, "loss": 2.796090316772461, "step": 89340 }, { "epoch": 0.721245045728631, "grad_norm": 0.9421164393424988, "learning_rate": 5.57886319394522e-06, "loss": 2.6311960220336914, "step": 89350 }, { "epoch": 0.7213257670543981, "grad_norm": 0.8199821710586548, "learning_rate": 5.577247724207007e-06, "loss": 2.3626407623291015, "step": 89360 }, { "epoch": 0.7214064883801652, "grad_norm": 0.7703748345375061, "learning_rate": 5.575632254468794e-06, "loss": 2.6998565673828123, "step": 89370 }, { "epoch": 0.7214872097059322, "grad_norm": 0.6608052849769592, "learning_rate": 5.5740167847305805e-06, "loss": 3.289327621459961, "step": 89380 }, { "epoch": 0.7215679310316993, "grad_norm": 0.7522531747817993, "learning_rate": 5.572401314992367e-06, "loss": 2.877022552490234, "step": 89390 }, { "epoch": 0.7216486523574663, "grad_norm": 0.6738505959510803, "learning_rate": 5.570785845254154e-06, "loss": 2.349333953857422, "step": 89400 }, { "epoch": 0.7217293736832334, "grad_norm": 0.8104575872421265, "learning_rate": 5.569170375515941e-06, "loss": 2.827528190612793, "step": 89410 }, { "epoch": 0.7218100950090004, "grad_norm": 1.243633508682251, "learning_rate": 5.567554905777728e-06, "loss": 3.0808650970458986, "step": 89420 }, { "epoch": 0.7218908163347675, "grad_norm": 0.9678273797035217, "learning_rate": 5.565939436039515e-06, "loss": 2.7355958938598635, "step": 89430 }, { "epoch": 0.7219715376605346, "grad_norm": 1.1092262268066406, "learning_rate": 5.564323966301302e-06, "loss": 2.4159507751464844, "step": 89440 }, { "epoch": 0.7220522589863015, "grad_norm": 1.4311336278915405, "learning_rate": 5.5627084965630885e-06, "loss": 2.803165817260742, "step": 89450 }, { "epoch": 0.7221329803120686, "grad_norm": 1.081972599029541, "learning_rate": 5.561093026824875e-06, "loss": 2.3722003936767577, "step": 89460 }, { "epoch": 0.7222137016378357, "grad_norm": 1.5058891773223877, "learning_rate": 5.559477557086662e-06, "loss": 2.666974639892578, "step": 89470 }, { "epoch": 0.7222944229636028, "grad_norm": 0.9115182161331177, "learning_rate": 5.557862087348449e-06, "loss": 2.4925159454345702, "step": 89480 }, { "epoch": 0.7223751442893698, "grad_norm": 1.9042702913284302, "learning_rate": 5.556246617610236e-06, "loss": 3.0719511032104494, "step": 89490 }, { "epoch": 0.7224558656151369, "grad_norm": 0.6100035905838013, "learning_rate": 5.554631147872023e-06, "loss": 2.4658348083496096, "step": 89500 }, { "epoch": 0.722536586940904, "grad_norm": 0.8855219483375549, "learning_rate": 5.5530156781338095e-06, "loss": 2.7632408142089844, "step": 89510 }, { "epoch": 0.7226173082666709, "grad_norm": 1.3269199132919312, "learning_rate": 5.551400208395596e-06, "loss": 2.421392059326172, "step": 89520 }, { "epoch": 0.722698029592438, "grad_norm": 0.8995566368103027, "learning_rate": 5.549784738657383e-06, "loss": 2.7698856353759767, "step": 89530 }, { "epoch": 0.7227787509182051, "grad_norm": 0.9444013237953186, "learning_rate": 5.54816926891917e-06, "loss": 2.4792755126953123, "step": 89540 }, { "epoch": 0.7228594722439722, "grad_norm": 1.0726768970489502, "learning_rate": 5.546553799180957e-06, "loss": 2.3984354019165037, "step": 89550 }, { "epoch": 0.7229401935697392, "grad_norm": 1.320233941078186, "learning_rate": 5.544938329442744e-06, "loss": 2.4538415908813476, "step": 89560 }, { "epoch": 0.7230209148955062, "grad_norm": 0.8544309139251709, "learning_rate": 5.543322859704531e-06, "loss": 2.7120269775390624, "step": 89570 }, { "epoch": 0.7231016362212733, "grad_norm": 0.9417265057563782, "learning_rate": 5.5417073899663175e-06, "loss": 2.428538513183594, "step": 89580 }, { "epoch": 0.7231823575470403, "grad_norm": 1.0821521282196045, "learning_rate": 5.540091920228104e-06, "loss": 2.4006465911865233, "step": 89590 }, { "epoch": 0.7232630788728074, "grad_norm": 0.7714840769767761, "learning_rate": 5.538476450489891e-06, "loss": 2.568142127990723, "step": 89600 }, { "epoch": 0.7233438001985745, "grad_norm": 1.3220373392105103, "learning_rate": 5.536860980751678e-06, "loss": 2.749978256225586, "step": 89610 }, { "epoch": 0.7234245215243416, "grad_norm": 0.7919031381607056, "learning_rate": 5.535245511013465e-06, "loss": 2.599662208557129, "step": 89620 }, { "epoch": 0.7235052428501085, "grad_norm": 0.835625410079956, "learning_rate": 5.533630041275252e-06, "loss": 2.737148666381836, "step": 89630 }, { "epoch": 0.7235859641758756, "grad_norm": 0.8876383304595947, "learning_rate": 5.5320145715370385e-06, "loss": 2.837147521972656, "step": 89640 }, { "epoch": 0.7236666855016427, "grad_norm": 1.0525250434875488, "learning_rate": 5.530399101798825e-06, "loss": 2.7612894058227537, "step": 89650 }, { "epoch": 0.7237474068274097, "grad_norm": 0.9527391195297241, "learning_rate": 5.528783632060612e-06, "loss": 2.42102108001709, "step": 89660 }, { "epoch": 0.7238281281531768, "grad_norm": 0.9415568113327026, "learning_rate": 5.527168162322399e-06, "loss": 2.976044464111328, "step": 89670 }, { "epoch": 0.7239088494789438, "grad_norm": 0.7666152715682983, "learning_rate": 5.525552692584186e-06, "loss": 2.6345457077026366, "step": 89680 }, { "epoch": 0.7239895708047109, "grad_norm": 0.897792398929596, "learning_rate": 5.523937222845973e-06, "loss": 2.7355117797851562, "step": 89690 }, { "epoch": 0.7240702921304779, "grad_norm": 0.866601824760437, "learning_rate": 5.52232175310776e-06, "loss": 2.5913572311401367, "step": 89700 }, { "epoch": 0.724151013456245, "grad_norm": 0.8845425248146057, "learning_rate": 5.5207062833695465e-06, "loss": 2.4988279342651367, "step": 89710 }, { "epoch": 0.7242317347820121, "grad_norm": 0.563584566116333, "learning_rate": 5.519090813631333e-06, "loss": 2.5751016616821287, "step": 89720 }, { "epoch": 0.7243124561077792, "grad_norm": 1.1331324577331543, "learning_rate": 5.51747534389312e-06, "loss": 2.7932653427124023, "step": 89730 }, { "epoch": 0.7243931774335461, "grad_norm": 1.1672124862670898, "learning_rate": 5.515859874154908e-06, "loss": 3.0921100616455077, "step": 89740 }, { "epoch": 0.7244738987593132, "grad_norm": 1.1941487789154053, "learning_rate": 5.514244404416695e-06, "loss": 2.5404407501220705, "step": 89750 }, { "epoch": 0.7245546200850803, "grad_norm": 0.5498613119125366, "learning_rate": 5.5126289346784815e-06, "loss": 2.6271263122558595, "step": 89760 }, { "epoch": 0.7246353414108473, "grad_norm": 1.4348310232162476, "learning_rate": 5.511013464940268e-06, "loss": 2.299402046203613, "step": 89770 }, { "epoch": 0.7247160627366144, "grad_norm": 0.588138997554779, "learning_rate": 5.509397995202055e-06, "loss": 2.672402191162109, "step": 89780 }, { "epoch": 0.7247967840623815, "grad_norm": 1.0955297946929932, "learning_rate": 5.507782525463842e-06, "loss": 3.139729309082031, "step": 89790 }, { "epoch": 0.7248775053881485, "grad_norm": 0.961874783039093, "learning_rate": 5.506167055725629e-06, "loss": 2.8329740524291993, "step": 89800 }, { "epoch": 0.7249582267139155, "grad_norm": 0.7686543464660645, "learning_rate": 5.504551585987416e-06, "loss": 2.770022964477539, "step": 89810 }, { "epoch": 0.7250389480396826, "grad_norm": 0.8962779641151428, "learning_rate": 5.502936116249203e-06, "loss": 2.9897199630737306, "step": 89820 }, { "epoch": 0.7251196693654497, "grad_norm": 0.6036266088485718, "learning_rate": 5.5013206465109895e-06, "loss": 2.561771774291992, "step": 89830 }, { "epoch": 0.7252003906912167, "grad_norm": 1.0046002864837646, "learning_rate": 5.499705176772776e-06, "loss": 2.710736083984375, "step": 89840 }, { "epoch": 0.7252811120169838, "grad_norm": 0.6133022308349609, "learning_rate": 5.498089707034563e-06, "loss": 2.374476432800293, "step": 89850 }, { "epoch": 0.7253618333427508, "grad_norm": 0.6934454441070557, "learning_rate": 5.49647423729635e-06, "loss": 2.48062744140625, "step": 89860 }, { "epoch": 0.7254425546685179, "grad_norm": 0.7294753789901733, "learning_rate": 5.494858767558137e-06, "loss": 2.5012054443359375, "step": 89870 }, { "epoch": 0.7255232759942849, "grad_norm": 0.916399359703064, "learning_rate": 5.493243297819924e-06, "loss": 2.5517337799072264, "step": 89880 }, { "epoch": 0.725603997320052, "grad_norm": 0.6288617253303528, "learning_rate": 5.4916278280817105e-06, "loss": 2.4276805877685548, "step": 89890 }, { "epoch": 0.7256847186458191, "grad_norm": 0.8574035167694092, "learning_rate": 5.490012358343498e-06, "loss": 2.547925567626953, "step": 89900 }, { "epoch": 0.725765439971586, "grad_norm": 0.9720122814178467, "learning_rate": 5.488396888605285e-06, "loss": 2.3550542831420898, "step": 89910 }, { "epoch": 0.7258461612973531, "grad_norm": 0.6986315250396729, "learning_rate": 5.486781418867072e-06, "loss": 2.5957866668701173, "step": 89920 }, { "epoch": 0.7259268826231202, "grad_norm": 0.9746367931365967, "learning_rate": 5.485165949128859e-06, "loss": 2.6065149307250977, "step": 89930 }, { "epoch": 0.7260076039488873, "grad_norm": 0.9915945529937744, "learning_rate": 5.483550479390646e-06, "loss": 2.552303504943848, "step": 89940 }, { "epoch": 0.7260883252746543, "grad_norm": 0.7334086894989014, "learning_rate": 5.4819350096524324e-06, "loss": 2.4001306533813476, "step": 89950 }, { "epoch": 0.7261690466004214, "grad_norm": 0.45947548747062683, "learning_rate": 5.480319539914219e-06, "loss": 2.882572364807129, "step": 89960 }, { "epoch": 0.7262497679261884, "grad_norm": 0.9392883777618408, "learning_rate": 5.478704070176006e-06, "loss": 2.7339181900024414, "step": 89970 }, { "epoch": 0.7263304892519554, "grad_norm": 0.7469055652618408, "learning_rate": 5.477088600437793e-06, "loss": 2.587372398376465, "step": 89980 }, { "epoch": 0.7264112105777225, "grad_norm": 1.1922026872634888, "learning_rate": 5.47547313069958e-06, "loss": 2.2107145309448244, "step": 89990 }, { "epoch": 0.7264919319034896, "grad_norm": 1.702958583831787, "learning_rate": 5.473857660961367e-06, "loss": 2.249578857421875, "step": 90000 }, { "epoch": 0.7265726532292567, "grad_norm": 1.4125267267227173, "learning_rate": 5.4722421912231535e-06, "loss": 2.9886661529541017, "step": 90010 }, { "epoch": 0.7266533745550237, "grad_norm": 0.667312502861023, "learning_rate": 5.47062672148494e-06, "loss": 2.457560729980469, "step": 90020 }, { "epoch": 0.7267340958807907, "grad_norm": 0.6116471886634827, "learning_rate": 5.469011251746727e-06, "loss": 2.414993095397949, "step": 90030 }, { "epoch": 0.7268148172065578, "grad_norm": 0.7000021934509277, "learning_rate": 5.467395782008514e-06, "loss": 2.800440788269043, "step": 90040 }, { "epoch": 0.7268955385323248, "grad_norm": 0.8755921721458435, "learning_rate": 5.465780312270301e-06, "loss": 2.3809799194335937, "step": 90050 }, { "epoch": 0.7269762598580919, "grad_norm": 0.6477211117744446, "learning_rate": 5.464164842532088e-06, "loss": 2.40318603515625, "step": 90060 }, { "epoch": 0.727056981183859, "grad_norm": 0.8287198543548584, "learning_rate": 5.462549372793875e-06, "loss": 2.3738130569458007, "step": 90070 }, { "epoch": 0.7271377025096261, "grad_norm": 1.3208400011062622, "learning_rate": 5.4609339030556614e-06, "loss": 2.8520254135131835, "step": 90080 }, { "epoch": 0.727218423835393, "grad_norm": 1.0055047273635864, "learning_rate": 5.459318433317448e-06, "loss": 2.5985185623168947, "step": 90090 }, { "epoch": 0.7272991451611601, "grad_norm": 1.3695919513702393, "learning_rate": 5.457702963579236e-06, "loss": 2.1287246704101563, "step": 90100 }, { "epoch": 0.7273798664869272, "grad_norm": 0.7490389943122864, "learning_rate": 5.456087493841023e-06, "loss": 2.3761724472045898, "step": 90110 }, { "epoch": 0.7274605878126942, "grad_norm": 1.1628488302230835, "learning_rate": 5.45447202410281e-06, "loss": 2.6339622497558595, "step": 90120 }, { "epoch": 0.7275413091384613, "grad_norm": 1.2343223094940186, "learning_rate": 5.4528565543645965e-06, "loss": 2.7144947052001953, "step": 90130 }, { "epoch": 0.7276220304642284, "grad_norm": 1.2446835041046143, "learning_rate": 5.451241084626383e-06, "loss": 2.354953956604004, "step": 90140 }, { "epoch": 0.7277027517899954, "grad_norm": 1.5490741729736328, "learning_rate": 5.44962561488817e-06, "loss": 2.600625419616699, "step": 90150 }, { "epoch": 0.7277834731157624, "grad_norm": 1.1756658554077148, "learning_rate": 5.448010145149957e-06, "loss": 2.3116167068481444, "step": 90160 }, { "epoch": 0.7278641944415295, "grad_norm": 0.7124348878860474, "learning_rate": 5.446394675411744e-06, "loss": 2.6238609313964845, "step": 90170 }, { "epoch": 0.7279449157672966, "grad_norm": 1.4468635320663452, "learning_rate": 5.444779205673531e-06, "loss": 2.852885055541992, "step": 90180 }, { "epoch": 0.7280256370930637, "grad_norm": 0.8928678631782532, "learning_rate": 5.443163735935318e-06, "loss": 2.9098281860351562, "step": 90190 }, { "epoch": 0.7281063584188306, "grad_norm": 0.9417805671691895, "learning_rate": 5.4415482661971044e-06, "loss": 2.240447998046875, "step": 90200 }, { "epoch": 0.7281870797445977, "grad_norm": 0.743887186050415, "learning_rate": 5.439932796458891e-06, "loss": 2.3994340896606445, "step": 90210 }, { "epoch": 0.7282678010703648, "grad_norm": 1.0420340299606323, "learning_rate": 5.438317326720678e-06, "loss": 2.470026397705078, "step": 90220 }, { "epoch": 0.7283485223961318, "grad_norm": 0.9677048325538635, "learning_rate": 5.436701856982465e-06, "loss": 2.638640594482422, "step": 90230 }, { "epoch": 0.7284292437218989, "grad_norm": 1.1518038511276245, "learning_rate": 5.435086387244252e-06, "loss": 2.6648229598999023, "step": 90240 }, { "epoch": 0.728509965047666, "grad_norm": 0.9051275849342346, "learning_rate": 5.433470917506039e-06, "loss": 2.491387367248535, "step": 90250 }, { "epoch": 0.728590686373433, "grad_norm": 0.5987113118171692, "learning_rate": 5.4318554477678255e-06, "loss": 2.696630859375, "step": 90260 }, { "epoch": 0.7286714076992, "grad_norm": 1.303680181503296, "learning_rate": 5.430239978029612e-06, "loss": 3.10074520111084, "step": 90270 }, { "epoch": 0.7287521290249671, "grad_norm": 1.4919004440307617, "learning_rate": 5.428624508291399e-06, "loss": 2.518410491943359, "step": 90280 }, { "epoch": 0.7288328503507342, "grad_norm": 0.8240571022033691, "learning_rate": 5.427009038553186e-06, "loss": 2.6451114654541015, "step": 90290 }, { "epoch": 0.7289135716765012, "grad_norm": 1.0385358333587646, "learning_rate": 5.425393568814973e-06, "loss": 2.39713077545166, "step": 90300 }, { "epoch": 0.7289942930022683, "grad_norm": 1.0072643756866455, "learning_rate": 5.42377809907676e-06, "loss": 2.615951347351074, "step": 90310 }, { "epoch": 0.7290750143280353, "grad_norm": 0.7153621315956116, "learning_rate": 5.422162629338547e-06, "loss": 3.1575687408447264, "step": 90320 }, { "epoch": 0.7291557356538024, "grad_norm": 0.7221550941467285, "learning_rate": 5.4205471596003334e-06, "loss": 2.7559535980224608, "step": 90330 }, { "epoch": 0.7292364569795694, "grad_norm": 1.0427998304367065, "learning_rate": 5.41893168986212e-06, "loss": 2.8205394744873047, "step": 90340 }, { "epoch": 0.7293171783053365, "grad_norm": 0.8316099643707275, "learning_rate": 5.417316220123907e-06, "loss": 2.686841583251953, "step": 90350 }, { "epoch": 0.7293978996311036, "grad_norm": 1.8779075145721436, "learning_rate": 5.415700750385694e-06, "loss": 2.644405746459961, "step": 90360 }, { "epoch": 0.7294786209568705, "grad_norm": 0.9448473453521729, "learning_rate": 5.414085280647481e-06, "loss": 2.6507320404052734, "step": 90370 }, { "epoch": 0.7295593422826376, "grad_norm": 0.6682572960853577, "learning_rate": 5.412469810909268e-06, "loss": 2.7625694274902344, "step": 90380 }, { "epoch": 0.7296400636084047, "grad_norm": 1.5345613956451416, "learning_rate": 5.4108543411710545e-06, "loss": 2.4939029693603514, "step": 90390 }, { "epoch": 0.7297207849341718, "grad_norm": 0.8864123225212097, "learning_rate": 5.409238871432841e-06, "loss": 2.769633483886719, "step": 90400 }, { "epoch": 0.7298015062599388, "grad_norm": 0.8166956305503845, "learning_rate": 5.407623401694628e-06, "loss": 2.5356189727783205, "step": 90410 }, { "epoch": 0.7298822275857059, "grad_norm": 0.8404062390327454, "learning_rate": 5.406007931956415e-06, "loss": 2.1833288192749025, "step": 90420 }, { "epoch": 0.729962948911473, "grad_norm": 0.7357181906700134, "learning_rate": 5.404392462218202e-06, "loss": 2.4904787063598635, "step": 90430 }, { "epoch": 0.7300436702372399, "grad_norm": 1.4917948246002197, "learning_rate": 5.402776992479989e-06, "loss": 2.6567663192749023, "step": 90440 }, { "epoch": 0.730124391563007, "grad_norm": 0.8312957882881165, "learning_rate": 5.401161522741776e-06, "loss": 2.634901237487793, "step": 90450 }, { "epoch": 0.7302051128887741, "grad_norm": 0.9327229261398315, "learning_rate": 5.3995460530035624e-06, "loss": 2.289403533935547, "step": 90460 }, { "epoch": 0.7302858342145412, "grad_norm": 1.5356985330581665, "learning_rate": 5.397930583265349e-06, "loss": 2.5876081466674803, "step": 90470 }, { "epoch": 0.7303665555403082, "grad_norm": 1.0539617538452148, "learning_rate": 5.396315113527136e-06, "loss": 2.4941408157348635, "step": 90480 }, { "epoch": 0.7304472768660752, "grad_norm": 1.0877678394317627, "learning_rate": 5.394699643788923e-06, "loss": 2.6870754241943358, "step": 90490 }, { "epoch": 0.7305279981918423, "grad_norm": 1.3099557161331177, "learning_rate": 5.39308417405071e-06, "loss": 2.848007392883301, "step": 90500 }, { "epoch": 0.7306087195176093, "grad_norm": 0.7919490337371826, "learning_rate": 5.391468704312497e-06, "loss": 3.151546669006348, "step": 90510 }, { "epoch": 0.7306894408433764, "grad_norm": 0.9940291047096252, "learning_rate": 5.3898532345742835e-06, "loss": 2.571136474609375, "step": 90520 }, { "epoch": 0.7307701621691435, "grad_norm": 0.9525814652442932, "learning_rate": 5.38823776483607e-06, "loss": 2.4455886840820313, "step": 90530 }, { "epoch": 0.7308508834949106, "grad_norm": 1.073375940322876, "learning_rate": 5.386622295097857e-06, "loss": 2.5242101669311525, "step": 90540 }, { "epoch": 0.7309316048206775, "grad_norm": 1.2982922792434692, "learning_rate": 5.385006825359644e-06, "loss": 2.6010854721069334, "step": 90550 }, { "epoch": 0.7310123261464446, "grad_norm": 0.8342114686965942, "learning_rate": 5.383391355621431e-06, "loss": 2.7265716552734376, "step": 90560 }, { "epoch": 0.7310930474722117, "grad_norm": 0.6368288993835449, "learning_rate": 5.381775885883218e-06, "loss": 2.6307058334350586, "step": 90570 }, { "epoch": 0.7311737687979787, "grad_norm": 0.929897129535675, "learning_rate": 5.3801604161450046e-06, "loss": 2.631728935241699, "step": 90580 }, { "epoch": 0.7312544901237458, "grad_norm": 1.0264214277267456, "learning_rate": 5.378544946406791e-06, "loss": 2.7614501953125, "step": 90590 }, { "epoch": 0.7313352114495129, "grad_norm": 0.6417721509933472, "learning_rate": 5.376929476668578e-06, "loss": 2.8818359375, "step": 90600 }, { "epoch": 0.7314159327752799, "grad_norm": 0.717307448387146, "learning_rate": 5.375314006930366e-06, "loss": 2.873972511291504, "step": 90610 }, { "epoch": 0.7314966541010469, "grad_norm": 1.5877546072006226, "learning_rate": 5.373698537192153e-06, "loss": 2.7058042526245116, "step": 90620 }, { "epoch": 0.731577375426814, "grad_norm": 1.0653115510940552, "learning_rate": 5.37208306745394e-06, "loss": 2.901655578613281, "step": 90630 }, { "epoch": 0.7316580967525811, "grad_norm": 0.5879470705986023, "learning_rate": 5.3704675977157265e-06, "loss": 2.486281394958496, "step": 90640 }, { "epoch": 0.7317388180783481, "grad_norm": 0.9184538125991821, "learning_rate": 5.368852127977513e-06, "loss": 3.019119644165039, "step": 90650 }, { "epoch": 0.7318195394041151, "grad_norm": 0.7624034285545349, "learning_rate": 5.3672366582393e-06, "loss": 2.884370040893555, "step": 90660 }, { "epoch": 0.7319002607298822, "grad_norm": 1.3121898174285889, "learning_rate": 5.365621188501087e-06, "loss": 3.467538833618164, "step": 90670 }, { "epoch": 0.7319809820556493, "grad_norm": 0.8838943839073181, "learning_rate": 5.364005718762874e-06, "loss": 2.670486831665039, "step": 90680 }, { "epoch": 0.7320617033814163, "grad_norm": 1.3175849914550781, "learning_rate": 5.362390249024661e-06, "loss": 2.7080415725708007, "step": 90690 }, { "epoch": 0.7321424247071834, "grad_norm": 0.9991394877433777, "learning_rate": 5.3607747792864476e-06, "loss": 2.690322685241699, "step": 90700 }, { "epoch": 0.7322231460329505, "grad_norm": 0.9578015804290771, "learning_rate": 5.359159309548234e-06, "loss": 2.523649978637695, "step": 90710 }, { "epoch": 0.7323038673587176, "grad_norm": 1.7477692365646362, "learning_rate": 5.357543839810021e-06, "loss": 2.655710983276367, "step": 90720 }, { "epoch": 0.7323845886844845, "grad_norm": 0.7606785297393799, "learning_rate": 5.355928370071808e-06, "loss": 2.872998809814453, "step": 90730 }, { "epoch": 0.7324653100102516, "grad_norm": 1.2130051851272583, "learning_rate": 5.354312900333595e-06, "loss": 2.646321487426758, "step": 90740 }, { "epoch": 0.7325460313360187, "grad_norm": 0.954711377620697, "learning_rate": 5.352697430595382e-06, "loss": 2.6836801528930665, "step": 90750 }, { "epoch": 0.7326267526617857, "grad_norm": 0.8930370211601257, "learning_rate": 5.351081960857169e-06, "loss": 2.170515441894531, "step": 90760 }, { "epoch": 0.7327074739875528, "grad_norm": 0.8318340182304382, "learning_rate": 5.3494664911189555e-06, "loss": 2.4810272216796876, "step": 90770 }, { "epoch": 0.7327881953133198, "grad_norm": 0.8560299277305603, "learning_rate": 5.347851021380742e-06, "loss": 2.692620849609375, "step": 90780 }, { "epoch": 0.7328689166390869, "grad_norm": 0.8867313265800476, "learning_rate": 5.346235551642529e-06, "loss": 2.718998336791992, "step": 90790 }, { "epoch": 0.7329496379648539, "grad_norm": 1.5585906505584717, "learning_rate": 5.344620081904316e-06, "loss": 2.404843330383301, "step": 90800 }, { "epoch": 0.733030359290621, "grad_norm": 0.8496745228767395, "learning_rate": 5.343004612166103e-06, "loss": 2.610490417480469, "step": 90810 }, { "epoch": 0.7331110806163881, "grad_norm": 0.7771853804588318, "learning_rate": 5.34138914242789e-06, "loss": 2.8435768127441405, "step": 90820 }, { "epoch": 0.733191801942155, "grad_norm": 1.0534623861312866, "learning_rate": 5.3397736726896766e-06, "loss": 3.0806644439697264, "step": 90830 }, { "epoch": 0.7332725232679221, "grad_norm": 0.8708305358886719, "learning_rate": 5.338158202951463e-06, "loss": 2.572992706298828, "step": 90840 }, { "epoch": 0.7333532445936892, "grad_norm": 1.0233815908432007, "learning_rate": 5.33654273321325e-06, "loss": 2.6184152603149413, "step": 90850 }, { "epoch": 0.7334339659194563, "grad_norm": 1.051867127418518, "learning_rate": 5.334927263475037e-06, "loss": 2.394352340698242, "step": 90860 }, { "epoch": 0.7335146872452233, "grad_norm": 0.9255433082580566, "learning_rate": 5.333311793736824e-06, "loss": 2.40682373046875, "step": 90870 }, { "epoch": 0.7335954085709904, "grad_norm": 1.4665946960449219, "learning_rate": 5.331696323998611e-06, "loss": 2.948231315612793, "step": 90880 }, { "epoch": 0.7336761298967575, "grad_norm": 0.6893442869186401, "learning_rate": 5.330080854260398e-06, "loss": 2.3054113388061523, "step": 90890 }, { "epoch": 0.7337568512225244, "grad_norm": 1.186362385749817, "learning_rate": 5.3284653845221845e-06, "loss": 2.585656929016113, "step": 90900 }, { "epoch": 0.7338375725482915, "grad_norm": 1.5653297901153564, "learning_rate": 5.326849914783971e-06, "loss": 2.5334630966186524, "step": 90910 }, { "epoch": 0.7339182938740586, "grad_norm": 1.0123008489608765, "learning_rate": 5.325234445045758e-06, "loss": 3.0250457763671874, "step": 90920 }, { "epoch": 0.7339990151998257, "grad_norm": 1.0899122953414917, "learning_rate": 5.323618975307545e-06, "loss": 2.775806999206543, "step": 90930 }, { "epoch": 0.7340797365255927, "grad_norm": 0.7698469161987305, "learning_rate": 5.322003505569332e-06, "loss": 3.0931980133056642, "step": 90940 }, { "epoch": 0.7341604578513597, "grad_norm": 1.0032774209976196, "learning_rate": 5.320388035831119e-06, "loss": 2.8013050079345705, "step": 90950 }, { "epoch": 0.7342411791771268, "grad_norm": 1.4404107332229614, "learning_rate": 5.3187725660929056e-06, "loss": 2.635783576965332, "step": 90960 }, { "epoch": 0.7343219005028938, "grad_norm": 1.0126926898956299, "learning_rate": 5.317157096354692e-06, "loss": 2.3505348205566405, "step": 90970 }, { "epoch": 0.7344026218286609, "grad_norm": 0.8469247221946716, "learning_rate": 5.315541626616479e-06, "loss": 2.1941728591918945, "step": 90980 }, { "epoch": 0.734483343154428, "grad_norm": 0.9119474291801453, "learning_rate": 5.313926156878266e-06, "loss": 2.43990478515625, "step": 90990 }, { "epoch": 0.7345640644801951, "grad_norm": 1.0558414459228516, "learning_rate": 5.312310687140053e-06, "loss": 2.3991683959960937, "step": 91000 }, { "epoch": 0.734644785805962, "grad_norm": 1.6292182207107544, "learning_rate": 5.31069521740184e-06, "loss": 2.5569305419921875, "step": 91010 }, { "epoch": 0.7347255071317291, "grad_norm": 0.8933079838752747, "learning_rate": 5.309079747663627e-06, "loss": 2.6208690643310546, "step": 91020 }, { "epoch": 0.7348062284574962, "grad_norm": 0.7872136235237122, "learning_rate": 5.3074642779254135e-06, "loss": 2.1457515716552735, "step": 91030 }, { "epoch": 0.7348869497832632, "grad_norm": 0.6412976980209351, "learning_rate": 5.3058488081872e-06, "loss": 2.580753517150879, "step": 91040 }, { "epoch": 0.7349676711090303, "grad_norm": 2.113030433654785, "learning_rate": 5.304233338448987e-06, "loss": 3.2592281341552733, "step": 91050 }, { "epoch": 0.7350483924347974, "grad_norm": 0.8897603750228882, "learning_rate": 5.302617868710774e-06, "loss": 2.577708625793457, "step": 91060 }, { "epoch": 0.7351291137605644, "grad_norm": 1.5103404521942139, "learning_rate": 5.301002398972561e-06, "loss": 2.5507785797119142, "step": 91070 }, { "epoch": 0.7352098350863314, "grad_norm": 0.625939130783081, "learning_rate": 5.299386929234348e-06, "loss": 2.6486053466796875, "step": 91080 }, { "epoch": 0.7352905564120985, "grad_norm": 0.7257287502288818, "learning_rate": 5.2977714594961345e-06, "loss": 2.46872615814209, "step": 91090 }, { "epoch": 0.7353712777378656, "grad_norm": 0.6284798979759216, "learning_rate": 5.296155989757921e-06, "loss": 2.534379768371582, "step": 91100 }, { "epoch": 0.7354519990636326, "grad_norm": 0.750719428062439, "learning_rate": 5.294540520019708e-06, "loss": 2.4122909545898437, "step": 91110 }, { "epoch": 0.7355327203893997, "grad_norm": 0.589546799659729, "learning_rate": 5.292925050281495e-06, "loss": 2.798995780944824, "step": 91120 }, { "epoch": 0.7356134417151667, "grad_norm": 0.706667423248291, "learning_rate": 5.291309580543283e-06, "loss": 2.670477294921875, "step": 91130 }, { "epoch": 0.7356941630409338, "grad_norm": 1.298819899559021, "learning_rate": 5.28969411080507e-06, "loss": 2.7087610244750975, "step": 91140 }, { "epoch": 0.7357748843667008, "grad_norm": 0.6461561322212219, "learning_rate": 5.288078641066857e-06, "loss": 2.538334274291992, "step": 91150 }, { "epoch": 0.7358556056924679, "grad_norm": 0.6943392753601074, "learning_rate": 5.286463171328644e-06, "loss": 2.77877140045166, "step": 91160 }, { "epoch": 0.735936327018235, "grad_norm": 1.1960786581039429, "learning_rate": 5.284847701590431e-06, "loss": 2.599087142944336, "step": 91170 }, { "epoch": 0.7360170483440021, "grad_norm": 0.7138100862503052, "learning_rate": 5.283232231852218e-06, "loss": 2.658548355102539, "step": 91180 }, { "epoch": 0.736097769669769, "grad_norm": 1.2947739362716675, "learning_rate": 5.281616762114005e-06, "loss": 2.949885940551758, "step": 91190 }, { "epoch": 0.7361784909955361, "grad_norm": 1.2825907468795776, "learning_rate": 5.2800012923757915e-06, "loss": 2.173225212097168, "step": 91200 }, { "epoch": 0.7362592123213032, "grad_norm": 1.842978596687317, "learning_rate": 5.278385822637578e-06, "loss": 2.9267084121704103, "step": 91210 }, { "epoch": 0.7363399336470702, "grad_norm": 0.5957543253898621, "learning_rate": 5.276770352899365e-06, "loss": 2.330329704284668, "step": 91220 }, { "epoch": 0.7364206549728373, "grad_norm": 0.9351698160171509, "learning_rate": 5.275154883161152e-06, "loss": 2.4390996932983398, "step": 91230 }, { "epoch": 0.7365013762986043, "grad_norm": 1.0608006715774536, "learning_rate": 5.273539413422939e-06, "loss": 2.3107702255249025, "step": 91240 }, { "epoch": 0.7365820976243714, "grad_norm": 0.9495835304260254, "learning_rate": 5.271923943684726e-06, "loss": 2.7518983840942384, "step": 91250 }, { "epoch": 0.7366628189501384, "grad_norm": 1.5411001443862915, "learning_rate": 5.270308473946513e-06, "loss": 2.9422252655029295, "step": 91260 }, { "epoch": 0.7367435402759055, "grad_norm": 1.647692322731018, "learning_rate": 5.2686930042082995e-06, "loss": 2.7070154190063476, "step": 91270 }, { "epoch": 0.7368242616016726, "grad_norm": 0.984099805355072, "learning_rate": 5.267077534470086e-06, "loss": 2.4629751205444337, "step": 91280 }, { "epoch": 0.7369049829274396, "grad_norm": 0.8112901449203491, "learning_rate": 5.265462064731873e-06, "loss": 2.6450286865234376, "step": 91290 }, { "epoch": 0.7369857042532066, "grad_norm": 1.6675472259521484, "learning_rate": 5.26384659499366e-06, "loss": 2.705936813354492, "step": 91300 }, { "epoch": 0.7370664255789737, "grad_norm": 1.509422779083252, "learning_rate": 5.262231125255447e-06, "loss": 2.373984146118164, "step": 91310 }, { "epoch": 0.7371471469047408, "grad_norm": 0.7273574471473694, "learning_rate": 5.260615655517234e-06, "loss": 2.5228723526000976, "step": 91320 }, { "epoch": 0.7372278682305078, "grad_norm": 0.6616495847702026, "learning_rate": 5.2590001857790205e-06, "loss": 2.937107467651367, "step": 91330 }, { "epoch": 0.7373085895562749, "grad_norm": 0.6227743625640869, "learning_rate": 5.257384716040807e-06, "loss": 3.2480369567871095, "step": 91340 }, { "epoch": 0.737389310882042, "grad_norm": 0.5886894464492798, "learning_rate": 5.255769246302594e-06, "loss": 2.762432098388672, "step": 91350 }, { "epoch": 0.7374700322078089, "grad_norm": 0.844577431678772, "learning_rate": 5.254153776564381e-06, "loss": 2.3078807830810546, "step": 91360 }, { "epoch": 0.737550753533576, "grad_norm": 1.2098504304885864, "learning_rate": 5.252538306826168e-06, "loss": 2.6730300903320314, "step": 91370 }, { "epoch": 0.7376314748593431, "grad_norm": 0.7790641188621521, "learning_rate": 5.250922837087955e-06, "loss": 2.298080253601074, "step": 91380 }, { "epoch": 0.7377121961851102, "grad_norm": 1.7627519369125366, "learning_rate": 5.249307367349742e-06, "loss": 2.5454475402832033, "step": 91390 }, { "epoch": 0.7377929175108772, "grad_norm": 1.3225946426391602, "learning_rate": 5.2476918976115285e-06, "loss": 2.667460060119629, "step": 91400 }, { "epoch": 0.7378736388366443, "grad_norm": 1.1785709857940674, "learning_rate": 5.246076427873315e-06, "loss": 2.863239860534668, "step": 91410 }, { "epoch": 0.7379543601624113, "grad_norm": 0.8573727011680603, "learning_rate": 5.244460958135102e-06, "loss": 2.4064287185668944, "step": 91420 }, { "epoch": 0.7380350814881783, "grad_norm": 0.7023150324821472, "learning_rate": 5.242845488396889e-06, "loss": 2.8358230590820312, "step": 91430 }, { "epoch": 0.7381158028139454, "grad_norm": 1.3451114892959595, "learning_rate": 5.241230018658676e-06, "loss": 2.6377758026123046, "step": 91440 }, { "epoch": 0.7381965241397125, "grad_norm": 1.0805511474609375, "learning_rate": 5.239614548920463e-06, "loss": 2.597267913818359, "step": 91450 }, { "epoch": 0.7382772454654796, "grad_norm": 0.7693408727645874, "learning_rate": 5.2379990791822495e-06, "loss": 3.1172889709472655, "step": 91460 }, { "epoch": 0.7383579667912465, "grad_norm": 0.8100792169570923, "learning_rate": 5.236383609444036e-06, "loss": 2.9072711944580076, "step": 91470 }, { "epoch": 0.7384386881170136, "grad_norm": 0.7373296618461609, "learning_rate": 5.234768139705823e-06, "loss": 2.3912689208984377, "step": 91480 }, { "epoch": 0.7385194094427807, "grad_norm": 1.0689672231674194, "learning_rate": 5.233152669967611e-06, "loss": 2.6681480407714844, "step": 91490 }, { "epoch": 0.7386001307685477, "grad_norm": 0.6370298266410828, "learning_rate": 5.231537200229398e-06, "loss": 2.946835517883301, "step": 91500 }, { "epoch": 0.7386808520943148, "grad_norm": 1.1274696588516235, "learning_rate": 5.229921730491185e-06, "loss": 2.2700569152832033, "step": 91510 }, { "epoch": 0.7387615734200819, "grad_norm": 1.1095720529556274, "learning_rate": 5.2283062607529715e-06, "loss": 2.327073669433594, "step": 91520 }, { "epoch": 0.738842294745849, "grad_norm": 1.2004669904708862, "learning_rate": 5.226690791014758e-06, "loss": 2.7158946990966797, "step": 91530 }, { "epoch": 0.7389230160716159, "grad_norm": 0.9296546578407288, "learning_rate": 5.225075321276545e-06, "loss": 3.1517019271850586, "step": 91540 }, { "epoch": 0.739003737397383, "grad_norm": 1.070422649383545, "learning_rate": 5.223459851538332e-06, "loss": 2.4013799667358398, "step": 91550 }, { "epoch": 0.7390844587231501, "grad_norm": 1.100486397743225, "learning_rate": 5.221844381800119e-06, "loss": 2.7353872299194335, "step": 91560 }, { "epoch": 0.7391651800489171, "grad_norm": 1.0834381580352783, "learning_rate": 5.220228912061906e-06, "loss": 2.461924743652344, "step": 91570 }, { "epoch": 0.7392459013746842, "grad_norm": 1.4510395526885986, "learning_rate": 5.2186134423236925e-06, "loss": 2.6872230529785157, "step": 91580 }, { "epoch": 0.7393266227004512, "grad_norm": 1.55745267868042, "learning_rate": 5.216997972585479e-06, "loss": 2.8454946517944335, "step": 91590 }, { "epoch": 0.7394073440262183, "grad_norm": 0.8181082010269165, "learning_rate": 5.215382502847266e-06, "loss": 2.2739583969116213, "step": 91600 }, { "epoch": 0.7394880653519853, "grad_norm": 0.938973069190979, "learning_rate": 5.213767033109053e-06, "loss": 2.224650192260742, "step": 91610 }, { "epoch": 0.7395687866777524, "grad_norm": 1.057045340538025, "learning_rate": 5.21215156337084e-06, "loss": 2.581742858886719, "step": 91620 }, { "epoch": 0.7396495080035195, "grad_norm": 1.755509853363037, "learning_rate": 5.210536093632627e-06, "loss": 2.689754867553711, "step": 91630 }, { "epoch": 0.7397302293292866, "grad_norm": 0.9201801419258118, "learning_rate": 5.208920623894414e-06, "loss": 2.7813310623168945, "step": 91640 }, { "epoch": 0.7398109506550535, "grad_norm": 1.138811469078064, "learning_rate": 5.2073051541562004e-06, "loss": 2.4343433380126953, "step": 91650 }, { "epoch": 0.7398916719808206, "grad_norm": 1.0821789503097534, "learning_rate": 5.205689684417987e-06, "loss": 2.5652631759643554, "step": 91660 }, { "epoch": 0.7399723933065877, "grad_norm": 1.3362175226211548, "learning_rate": 5.204074214679774e-06, "loss": 2.358882713317871, "step": 91670 }, { "epoch": 0.7400531146323547, "grad_norm": 0.8242215514183044, "learning_rate": 5.202458744941561e-06, "loss": 2.8487205505371094, "step": 91680 }, { "epoch": 0.7401338359581218, "grad_norm": 0.6442916393280029, "learning_rate": 5.200843275203348e-06, "loss": 2.7357187271118164, "step": 91690 }, { "epoch": 0.7402145572838889, "grad_norm": 1.217238426208496, "learning_rate": 5.199227805465135e-06, "loss": 2.5650938034057615, "step": 91700 }, { "epoch": 0.7402952786096559, "grad_norm": 1.0219597816467285, "learning_rate": 5.1976123357269215e-06, "loss": 2.526209831237793, "step": 91710 }, { "epoch": 0.7403759999354229, "grad_norm": 1.2465766668319702, "learning_rate": 5.195996865988708e-06, "loss": 2.5636878967285157, "step": 91720 }, { "epoch": 0.74045672126119, "grad_norm": 0.6657280325889587, "learning_rate": 5.194381396250495e-06, "loss": 2.8620458602905274, "step": 91730 }, { "epoch": 0.7405374425869571, "grad_norm": 0.6303619146347046, "learning_rate": 5.192765926512282e-06, "loss": 2.5967535018920898, "step": 91740 }, { "epoch": 0.7406181639127241, "grad_norm": 1.0904659032821655, "learning_rate": 5.191150456774069e-06, "loss": 2.434335708618164, "step": 91750 }, { "epoch": 0.7406988852384911, "grad_norm": 1.1301217079162598, "learning_rate": 5.189534987035856e-06, "loss": 2.7913702011108397, "step": 91760 }, { "epoch": 0.7407796065642582, "grad_norm": 0.8443275690078735, "learning_rate": 5.187919517297643e-06, "loss": 2.647247314453125, "step": 91770 }, { "epoch": 0.7408603278900253, "grad_norm": 1.167618751525879, "learning_rate": 5.1863040475594294e-06, "loss": 2.6579479217529296, "step": 91780 }, { "epoch": 0.7409410492157923, "grad_norm": 1.502402901649475, "learning_rate": 5.184688577821216e-06, "loss": 2.8550142288208007, "step": 91790 }, { "epoch": 0.7410217705415594, "grad_norm": 0.8483044505119324, "learning_rate": 5.183073108083003e-06, "loss": 2.5069637298583984, "step": 91800 }, { "epoch": 0.7411024918673265, "grad_norm": 0.8792640566825867, "learning_rate": 5.18145763834479e-06, "loss": 2.5109041213989256, "step": 91810 }, { "epoch": 0.7411832131930934, "grad_norm": 0.7976579666137695, "learning_rate": 5.179842168606577e-06, "loss": 2.4416782379150392, "step": 91820 }, { "epoch": 0.7412639345188605, "grad_norm": 0.8495512008666992, "learning_rate": 5.178226698868364e-06, "loss": 2.6321762084960936, "step": 91830 }, { "epoch": 0.7413446558446276, "grad_norm": 0.5946964621543884, "learning_rate": 5.1766112291301505e-06, "loss": 2.410526466369629, "step": 91840 }, { "epoch": 0.7414253771703947, "grad_norm": 0.8570066094398499, "learning_rate": 5.174995759391937e-06, "loss": 2.5598512649536134, "step": 91850 }, { "epoch": 0.7415060984961617, "grad_norm": 0.8543986678123474, "learning_rate": 5.173380289653724e-06, "loss": 2.938564682006836, "step": 91860 }, { "epoch": 0.7415868198219288, "grad_norm": 1.3952564001083374, "learning_rate": 5.171764819915511e-06, "loss": 2.3067623138427735, "step": 91870 }, { "epoch": 0.7416675411476958, "grad_norm": 1.1283183097839355, "learning_rate": 5.170149350177298e-06, "loss": 3.0546260833740235, "step": 91880 }, { "epoch": 0.7417482624734628, "grad_norm": 0.6301043629646301, "learning_rate": 5.168533880439085e-06, "loss": 2.7426803588867186, "step": 91890 }, { "epoch": 0.7418289837992299, "grad_norm": 0.8836440443992615, "learning_rate": 5.166918410700872e-06, "loss": 2.7642333984375, "step": 91900 }, { "epoch": 0.741909705124997, "grad_norm": 0.9093005657196045, "learning_rate": 5.1653029409626584e-06, "loss": 2.854613494873047, "step": 91910 }, { "epoch": 0.7419904264507641, "grad_norm": 1.4696354866027832, "learning_rate": 5.163687471224445e-06, "loss": 2.344512367248535, "step": 91920 }, { "epoch": 0.742071147776531, "grad_norm": 0.5712679624557495, "learning_rate": 5.162072001486232e-06, "loss": 2.846391296386719, "step": 91930 }, { "epoch": 0.7421518691022981, "grad_norm": 0.9972032904624939, "learning_rate": 5.160456531748019e-06, "loss": 2.4886566162109376, "step": 91940 }, { "epoch": 0.7422325904280652, "grad_norm": 1.3238288164138794, "learning_rate": 5.158841062009806e-06, "loss": 2.667796325683594, "step": 91950 }, { "epoch": 0.7423133117538322, "grad_norm": 1.649124264717102, "learning_rate": 5.157225592271593e-06, "loss": 2.493358612060547, "step": 91960 }, { "epoch": 0.7423940330795993, "grad_norm": 1.7090582847595215, "learning_rate": 5.1556101225333795e-06, "loss": 2.9191072463989256, "step": 91970 }, { "epoch": 0.7424747544053664, "grad_norm": 0.88961261510849, "learning_rate": 5.153994652795166e-06, "loss": 2.336140251159668, "step": 91980 }, { "epoch": 0.7425554757311335, "grad_norm": 0.6875024437904358, "learning_rate": 5.152379183056953e-06, "loss": 2.2763202667236326, "step": 91990 }, { "epoch": 0.7426361970569004, "grad_norm": 1.3887799978256226, "learning_rate": 5.150763713318741e-06, "loss": 2.7243303298950194, "step": 92000 }, { "epoch": 0.7427169183826675, "grad_norm": 0.9964314103126526, "learning_rate": 5.149148243580528e-06, "loss": 2.761570930480957, "step": 92010 }, { "epoch": 0.7427976397084346, "grad_norm": 1.130307674407959, "learning_rate": 5.147532773842315e-06, "loss": 2.7188388824462892, "step": 92020 }, { "epoch": 0.7428783610342016, "grad_norm": 0.9226730465888977, "learning_rate": 5.1459173041041014e-06, "loss": 2.396516799926758, "step": 92030 }, { "epoch": 0.7429590823599687, "grad_norm": 1.398897647857666, "learning_rate": 5.144301834365888e-06, "loss": 2.6451568603515625, "step": 92040 }, { "epoch": 0.7430398036857357, "grad_norm": 0.6499971747398376, "learning_rate": 5.142686364627675e-06, "loss": 2.402614974975586, "step": 92050 }, { "epoch": 0.7431205250115028, "grad_norm": 0.7707343697547913, "learning_rate": 5.141070894889462e-06, "loss": 2.6430925369262694, "step": 92060 }, { "epoch": 0.7432012463372698, "grad_norm": 1.0522818565368652, "learning_rate": 5.139455425151249e-06, "loss": 2.232292366027832, "step": 92070 }, { "epoch": 0.7432819676630369, "grad_norm": 0.76469486951828, "learning_rate": 5.137839955413036e-06, "loss": 2.685994529724121, "step": 92080 }, { "epoch": 0.743362688988804, "grad_norm": 0.6708725690841675, "learning_rate": 5.1362244856748225e-06, "loss": 2.519037437438965, "step": 92090 }, { "epoch": 0.743443410314571, "grad_norm": 1.0556591749191284, "learning_rate": 5.134609015936609e-06, "loss": 2.594565773010254, "step": 92100 }, { "epoch": 0.743524131640338, "grad_norm": 1.0949827432632446, "learning_rate": 5.132993546198396e-06, "loss": 2.593456268310547, "step": 92110 }, { "epoch": 0.7436048529661051, "grad_norm": 0.6773986220359802, "learning_rate": 5.131378076460183e-06, "loss": 2.7391120910644533, "step": 92120 }, { "epoch": 0.7436855742918722, "grad_norm": 0.7629951238632202, "learning_rate": 5.12976260672197e-06, "loss": 2.6590572357177735, "step": 92130 }, { "epoch": 0.7437662956176392, "grad_norm": 1.0558890104293823, "learning_rate": 5.128147136983757e-06, "loss": 2.4375518798828124, "step": 92140 }, { "epoch": 0.7438470169434063, "grad_norm": 0.7468170523643494, "learning_rate": 5.126531667245544e-06, "loss": 2.2667266845703127, "step": 92150 }, { "epoch": 0.7439277382691734, "grad_norm": 1.0298691987991333, "learning_rate": 5.1249161975073304e-06, "loss": 2.402995300292969, "step": 92160 }, { "epoch": 0.7440084595949404, "grad_norm": 1.2708351612091064, "learning_rate": 5.123300727769117e-06, "loss": 3.0268383026123047, "step": 92170 }, { "epoch": 0.7440891809207074, "grad_norm": 0.9253402948379517, "learning_rate": 5.121685258030904e-06, "loss": 2.8821020126342773, "step": 92180 }, { "epoch": 0.7441699022464745, "grad_norm": 0.7027146816253662, "learning_rate": 5.120069788292691e-06, "loss": 2.965077781677246, "step": 92190 }, { "epoch": 0.7442506235722416, "grad_norm": 1.0998269319534302, "learning_rate": 5.118454318554478e-06, "loss": 2.3402732849121093, "step": 92200 }, { "epoch": 0.7443313448980086, "grad_norm": 0.825408935546875, "learning_rate": 5.116838848816265e-06, "loss": 2.678345489501953, "step": 92210 }, { "epoch": 0.7444120662237756, "grad_norm": 1.6907236576080322, "learning_rate": 5.1152233790780515e-06, "loss": 2.504751968383789, "step": 92220 }, { "epoch": 0.7444927875495427, "grad_norm": 2.053203821182251, "learning_rate": 5.113607909339838e-06, "loss": 3.2069427490234377, "step": 92230 }, { "epoch": 0.7445735088753098, "grad_norm": 0.9071234464645386, "learning_rate": 5.111992439601625e-06, "loss": 2.653101348876953, "step": 92240 }, { "epoch": 0.7446542302010768, "grad_norm": 0.6706135272979736, "learning_rate": 5.110376969863412e-06, "loss": 2.6405967712402343, "step": 92250 }, { "epoch": 0.7447349515268439, "grad_norm": 0.6988325715065002, "learning_rate": 5.108761500125199e-06, "loss": 2.8338417053222655, "step": 92260 }, { "epoch": 0.744815672852611, "grad_norm": 0.6767387986183167, "learning_rate": 5.107146030386986e-06, "loss": 2.8979160308837892, "step": 92270 }, { "epoch": 0.7448963941783779, "grad_norm": 1.3916270732879639, "learning_rate": 5.1055305606487726e-06, "loss": 2.741663932800293, "step": 92280 }, { "epoch": 0.744977115504145, "grad_norm": 0.8997920155525208, "learning_rate": 5.103915090910559e-06, "loss": 2.2937749862670898, "step": 92290 }, { "epoch": 0.7450578368299121, "grad_norm": 0.876170814037323, "learning_rate": 5.102299621172346e-06, "loss": 2.513829231262207, "step": 92300 }, { "epoch": 0.7451385581556792, "grad_norm": 0.9793541431427002, "learning_rate": 5.100684151434133e-06, "loss": 2.4622297286987305, "step": 92310 }, { "epoch": 0.7452192794814462, "grad_norm": 0.592788815498352, "learning_rate": 5.09906868169592e-06, "loss": 2.939537239074707, "step": 92320 }, { "epoch": 0.7453000008072133, "grad_norm": 1.0838301181793213, "learning_rate": 5.097453211957707e-06, "loss": 2.1503231048583986, "step": 92330 }, { "epoch": 0.7453807221329803, "grad_norm": 1.149324893951416, "learning_rate": 5.095837742219494e-06, "loss": 2.2256338119506838, "step": 92340 }, { "epoch": 0.7454614434587473, "grad_norm": 1.097118854522705, "learning_rate": 5.0942222724812805e-06, "loss": 2.5110008239746096, "step": 92350 }, { "epoch": 0.7455421647845144, "grad_norm": 0.7591117024421692, "learning_rate": 5.092606802743067e-06, "loss": 2.3349884033203123, "step": 92360 }, { "epoch": 0.7456228861102815, "grad_norm": 0.9791789054870605, "learning_rate": 5.090991333004854e-06, "loss": 2.297021484375, "step": 92370 }, { "epoch": 0.7457036074360486, "grad_norm": 1.2303600311279297, "learning_rate": 5.089375863266641e-06, "loss": 2.4422637939453127, "step": 92380 }, { "epoch": 0.7457843287618156, "grad_norm": 1.2505789995193481, "learning_rate": 5.087760393528428e-06, "loss": 2.572202682495117, "step": 92390 }, { "epoch": 0.7458650500875826, "grad_norm": 1.5662336349487305, "learning_rate": 5.086144923790215e-06, "loss": 2.8461868286132814, "step": 92400 }, { "epoch": 0.7459457714133497, "grad_norm": 0.984096884727478, "learning_rate": 5.084529454052003e-06, "loss": 2.554979705810547, "step": 92410 }, { "epoch": 0.7460264927391167, "grad_norm": 0.6686956882476807, "learning_rate": 5.08291398431379e-06, "loss": 2.6694318771362306, "step": 92420 }, { "epoch": 0.7461072140648838, "grad_norm": 1.1779258251190186, "learning_rate": 5.081298514575577e-06, "loss": 3.2362403869628906, "step": 92430 }, { "epoch": 0.7461879353906509, "grad_norm": 0.9207465648651123, "learning_rate": 5.079683044837364e-06, "loss": 2.69195499420166, "step": 92440 }, { "epoch": 0.746268656716418, "grad_norm": 0.7655641436576843, "learning_rate": 5.078067575099151e-06, "loss": 2.6337516784667967, "step": 92450 }, { "epoch": 0.7463493780421849, "grad_norm": 0.6940430402755737, "learning_rate": 5.0764521053609375e-06, "loss": 2.491115760803223, "step": 92460 }, { "epoch": 0.746430099367952, "grad_norm": 1.0539158582687378, "learning_rate": 5.074836635622724e-06, "loss": 2.754745101928711, "step": 92470 }, { "epoch": 0.7465108206937191, "grad_norm": 1.3055254220962524, "learning_rate": 5.073221165884511e-06, "loss": 2.7789028167724608, "step": 92480 }, { "epoch": 0.7465915420194861, "grad_norm": 1.2844692468643188, "learning_rate": 5.071605696146298e-06, "loss": 2.9611717224121095, "step": 92490 }, { "epoch": 0.7466722633452532, "grad_norm": 1.2721353769302368, "learning_rate": 5.069990226408085e-06, "loss": 3.0450939178466796, "step": 92500 }, { "epoch": 0.7467529846710202, "grad_norm": 1.1401339769363403, "learning_rate": 5.068374756669872e-06, "loss": 2.101360893249512, "step": 92510 }, { "epoch": 0.7468337059967873, "grad_norm": 1.4495340585708618, "learning_rate": 5.0667592869316586e-06, "loss": 2.401835060119629, "step": 92520 }, { "epoch": 0.7469144273225543, "grad_norm": 1.0331677198410034, "learning_rate": 5.065143817193445e-06, "loss": 3.2903060913085938, "step": 92530 }, { "epoch": 0.7469951486483214, "grad_norm": 0.6304751038551331, "learning_rate": 5.063528347455232e-06, "loss": 3.068894386291504, "step": 92540 }, { "epoch": 0.7470758699740885, "grad_norm": 0.7240766882896423, "learning_rate": 5.061912877717019e-06, "loss": 2.5958127975463867, "step": 92550 }, { "epoch": 0.7471565912998555, "grad_norm": 0.9237455725669861, "learning_rate": 5.060297407978806e-06, "loss": 2.659662055969238, "step": 92560 }, { "epoch": 0.7472373126256225, "grad_norm": 0.8079571723937988, "learning_rate": 5.058681938240593e-06, "loss": 2.3168764114379883, "step": 92570 }, { "epoch": 0.7473180339513896, "grad_norm": 1.0930384397506714, "learning_rate": 5.05706646850238e-06, "loss": 2.2623126983642576, "step": 92580 }, { "epoch": 0.7473987552771567, "grad_norm": 1.815364122390747, "learning_rate": 5.0554509987641665e-06, "loss": 2.4260126113891602, "step": 92590 }, { "epoch": 0.7474794766029237, "grad_norm": 1.1303778886795044, "learning_rate": 5.053835529025953e-06, "loss": 2.37183837890625, "step": 92600 }, { "epoch": 0.7475601979286908, "grad_norm": 1.177756667137146, "learning_rate": 5.05222005928774e-06, "loss": 2.6376773834228517, "step": 92610 }, { "epoch": 0.7476409192544579, "grad_norm": 0.5563175082206726, "learning_rate": 5.050604589549527e-06, "loss": 2.688129997253418, "step": 92620 }, { "epoch": 0.7477216405802249, "grad_norm": 0.588830292224884, "learning_rate": 5.048989119811314e-06, "loss": 2.442196846008301, "step": 92630 }, { "epoch": 0.7478023619059919, "grad_norm": 0.9807941913604736, "learning_rate": 5.047373650073101e-06, "loss": 2.7340442657470705, "step": 92640 }, { "epoch": 0.747883083231759, "grad_norm": 0.8127692341804504, "learning_rate": 5.0457581803348876e-06, "loss": 2.582122039794922, "step": 92650 }, { "epoch": 0.7479638045575261, "grad_norm": 0.6417713165283203, "learning_rate": 5.044142710596674e-06, "loss": 2.5363367080688475, "step": 92660 }, { "epoch": 0.7480445258832931, "grad_norm": 1.004504680633545, "learning_rate": 5.042527240858461e-06, "loss": 2.3702665328979493, "step": 92670 }, { "epoch": 0.7481252472090602, "grad_norm": 0.7098262906074524, "learning_rate": 5.040911771120248e-06, "loss": 2.8897937774658202, "step": 92680 }, { "epoch": 0.7482059685348272, "grad_norm": 0.7359583973884583, "learning_rate": 5.039296301382035e-06, "loss": 2.639004135131836, "step": 92690 }, { "epoch": 0.7482866898605943, "grad_norm": 0.6911440491676331, "learning_rate": 5.037680831643822e-06, "loss": 2.4076278686523436, "step": 92700 }, { "epoch": 0.7483674111863613, "grad_norm": 0.7669402360916138, "learning_rate": 5.036065361905609e-06, "loss": 2.707694435119629, "step": 92710 }, { "epoch": 0.7484481325121284, "grad_norm": 0.8795379400253296, "learning_rate": 5.0344498921673955e-06, "loss": 2.657325553894043, "step": 92720 }, { "epoch": 0.7485288538378955, "grad_norm": 1.1447488069534302, "learning_rate": 5.032834422429182e-06, "loss": 2.7054298400878904, "step": 92730 }, { "epoch": 0.7486095751636624, "grad_norm": 1.0601956844329834, "learning_rate": 5.031218952690969e-06, "loss": 2.531599998474121, "step": 92740 }, { "epoch": 0.7486902964894295, "grad_norm": 1.3997650146484375, "learning_rate": 5.029603482952756e-06, "loss": 2.475275421142578, "step": 92750 }, { "epoch": 0.7487710178151966, "grad_norm": 0.8789177536964417, "learning_rate": 5.027988013214543e-06, "loss": 2.621120834350586, "step": 92760 }, { "epoch": 0.7488517391409637, "grad_norm": 0.6730213165283203, "learning_rate": 5.02637254347633e-06, "loss": 3.050014877319336, "step": 92770 }, { "epoch": 0.7489324604667307, "grad_norm": 0.9227690100669861, "learning_rate": 5.0247570737381166e-06, "loss": 2.8700061798095704, "step": 92780 }, { "epoch": 0.7490131817924978, "grad_norm": 1.1095908880233765, "learning_rate": 5.023141603999903e-06, "loss": 2.743519973754883, "step": 92790 }, { "epoch": 0.7490939031182648, "grad_norm": 0.9545638561248779, "learning_rate": 5.02152613426169e-06, "loss": 2.4762493133544923, "step": 92800 }, { "epoch": 0.7491746244440318, "grad_norm": 0.7770814895629883, "learning_rate": 5.019910664523477e-06, "loss": 3.1385065078735352, "step": 92810 }, { "epoch": 0.7492553457697989, "grad_norm": 0.9851968288421631, "learning_rate": 5.018295194785264e-06, "loss": 2.3007144927978516, "step": 92820 }, { "epoch": 0.749336067095566, "grad_norm": 0.864819347858429, "learning_rate": 5.016679725047051e-06, "loss": 2.693271446228027, "step": 92830 }, { "epoch": 0.7494167884213331, "grad_norm": 0.9312680959701538, "learning_rate": 5.015064255308838e-06, "loss": 2.668765830993652, "step": 92840 }, { "epoch": 0.7494975097471, "grad_norm": 0.8206484317779541, "learning_rate": 5.0134487855706245e-06, "loss": 2.74869441986084, "step": 92850 }, { "epoch": 0.7495782310728671, "grad_norm": 1.1936311721801758, "learning_rate": 5.011833315832411e-06, "loss": 2.471513557434082, "step": 92860 }, { "epoch": 0.7496589523986342, "grad_norm": 0.6902677416801453, "learning_rate": 5.010217846094199e-06, "loss": 2.94649715423584, "step": 92870 }, { "epoch": 0.7497396737244012, "grad_norm": 0.5813250541687012, "learning_rate": 5.008602376355986e-06, "loss": 2.7518308639526365, "step": 92880 }, { "epoch": 0.7498203950501683, "grad_norm": 1.0066837072372437, "learning_rate": 5.006986906617773e-06, "loss": 2.8637058258056642, "step": 92890 }, { "epoch": 0.7499011163759354, "grad_norm": 0.976245641708374, "learning_rate": 5.0053714368795595e-06, "loss": 2.952859115600586, "step": 92900 }, { "epoch": 0.7499818377017025, "grad_norm": 0.9081425070762634, "learning_rate": 5.003755967141346e-06, "loss": 2.8133588790893556, "step": 92910 }, { "epoch": 0.7500625590274694, "grad_norm": 0.9490267634391785, "learning_rate": 5.002140497403133e-06, "loss": 2.547576904296875, "step": 92920 }, { "epoch": 0.7501432803532365, "grad_norm": 0.8255760073661804, "learning_rate": 5.00052502766492e-06, "loss": 2.7635919570922853, "step": 92930 }, { "epoch": 0.7502240016790036, "grad_norm": 1.1576910018920898, "learning_rate": 4.998909557926707e-06, "loss": 2.719432067871094, "step": 92940 }, { "epoch": 0.7503047230047706, "grad_norm": 0.7980551719665527, "learning_rate": 4.997294088188494e-06, "loss": 2.8340335845947267, "step": 92950 }, { "epoch": 0.7503854443305377, "grad_norm": 0.8438399434089661, "learning_rate": 4.995678618450281e-06, "loss": 2.6167354583740234, "step": 92960 }, { "epoch": 0.7504661656563048, "grad_norm": 0.7315702438354492, "learning_rate": 4.9940631487120675e-06, "loss": 2.792204666137695, "step": 92970 }, { "epoch": 0.7505468869820718, "grad_norm": 0.8277437090873718, "learning_rate": 4.992447678973854e-06, "loss": 2.6863834381103517, "step": 92980 }, { "epoch": 0.7506276083078388, "grad_norm": 0.8393320441246033, "learning_rate": 4.990832209235641e-06, "loss": 2.5407033920288087, "step": 92990 }, { "epoch": 0.7507083296336059, "grad_norm": 1.3151665925979614, "learning_rate": 4.989216739497428e-06, "loss": 2.5740671157836914, "step": 93000 }, { "epoch": 0.750789050959373, "grad_norm": 1.4731378555297852, "learning_rate": 4.987601269759215e-06, "loss": 2.524250602722168, "step": 93010 }, { "epoch": 0.75086977228514, "grad_norm": 1.147340178489685, "learning_rate": 4.985985800021002e-06, "loss": 2.3764671325683593, "step": 93020 }, { "epoch": 0.750950493610907, "grad_norm": 0.6169258952140808, "learning_rate": 4.9843703302827885e-06, "loss": 2.5668142318725584, "step": 93030 }, { "epoch": 0.7510312149366741, "grad_norm": 1.1927040815353394, "learning_rate": 4.982754860544575e-06, "loss": 3.033147621154785, "step": 93040 }, { "epoch": 0.7511119362624412, "grad_norm": 1.1412431001663208, "learning_rate": 4.981139390806362e-06, "loss": 2.8238641738891603, "step": 93050 }, { "epoch": 0.7511926575882082, "grad_norm": 0.589344322681427, "learning_rate": 4.979523921068149e-06, "loss": 2.842429542541504, "step": 93060 }, { "epoch": 0.7512733789139753, "grad_norm": 1.016184687614441, "learning_rate": 4.977908451329936e-06, "loss": 2.583940315246582, "step": 93070 }, { "epoch": 0.7513541002397424, "grad_norm": 0.7370269894599915, "learning_rate": 4.976292981591723e-06, "loss": 2.46588077545166, "step": 93080 }, { "epoch": 0.7514348215655093, "grad_norm": 0.9984588027000427, "learning_rate": 4.97467751185351e-06, "loss": 2.8800960540771485, "step": 93090 }, { "epoch": 0.7515155428912764, "grad_norm": 0.9000251889228821, "learning_rate": 4.9730620421152965e-06, "loss": 2.6402769088745117, "step": 93100 }, { "epoch": 0.7515962642170435, "grad_norm": 1.1574307680130005, "learning_rate": 4.971446572377083e-06, "loss": 2.4848968505859377, "step": 93110 }, { "epoch": 0.7516769855428106, "grad_norm": 1.337167501449585, "learning_rate": 4.96983110263887e-06, "loss": 2.6588178634643556, "step": 93120 }, { "epoch": 0.7517577068685776, "grad_norm": 1.2266566753387451, "learning_rate": 4.968215632900657e-06, "loss": 2.7883123397827148, "step": 93130 }, { "epoch": 0.7518384281943447, "grad_norm": 1.3942893743515015, "learning_rate": 4.966600163162444e-06, "loss": 2.263503837585449, "step": 93140 }, { "epoch": 0.7519191495201117, "grad_norm": 1.5247269868850708, "learning_rate": 4.964984693424231e-06, "loss": 3.1705846786499023, "step": 93150 }, { "epoch": 0.7519998708458788, "grad_norm": 0.7780817747116089, "learning_rate": 4.9633692236860175e-06, "loss": 2.785975456237793, "step": 93160 }, { "epoch": 0.7520805921716458, "grad_norm": 1.4650377035140991, "learning_rate": 4.961753753947804e-06, "loss": 2.6650955200195314, "step": 93170 }, { "epoch": 0.7521613134974129, "grad_norm": 0.7227349877357483, "learning_rate": 4.960138284209591e-06, "loss": 2.511891746520996, "step": 93180 }, { "epoch": 0.75224203482318, "grad_norm": 0.9524852633476257, "learning_rate": 4.958522814471378e-06, "loss": 2.6437856674194338, "step": 93190 }, { "epoch": 0.752322756148947, "grad_norm": 0.9085378050804138, "learning_rate": 4.956907344733165e-06, "loss": 2.049555206298828, "step": 93200 }, { "epoch": 0.752403477474714, "grad_norm": 0.6879594922065735, "learning_rate": 4.955291874994952e-06, "loss": 2.30137939453125, "step": 93210 }, { "epoch": 0.7524841988004811, "grad_norm": 0.8991504907608032, "learning_rate": 4.953676405256739e-06, "loss": 2.5000267028808594, "step": 93220 }, { "epoch": 0.7525649201262482, "grad_norm": 0.8297313451766968, "learning_rate": 4.9520609355185255e-06, "loss": 2.540912628173828, "step": 93230 }, { "epoch": 0.7526456414520152, "grad_norm": 0.8122484087944031, "learning_rate": 4.950445465780312e-06, "loss": 2.882141876220703, "step": 93240 }, { "epoch": 0.7527263627777823, "grad_norm": 1.3107833862304688, "learning_rate": 4.948829996042099e-06, "loss": 2.7997806549072264, "step": 93250 }, { "epoch": 0.7528070841035494, "grad_norm": 1.8658703565597534, "learning_rate": 4.947214526303886e-06, "loss": 2.742942237854004, "step": 93260 }, { "epoch": 0.7528878054293163, "grad_norm": 0.9818388223648071, "learning_rate": 4.945599056565673e-06, "loss": 2.516001319885254, "step": 93270 }, { "epoch": 0.7529685267550834, "grad_norm": 0.7982625365257263, "learning_rate": 4.94398358682746e-06, "loss": 2.7227407455444337, "step": 93280 }, { "epoch": 0.7530492480808505, "grad_norm": 0.7489678859710693, "learning_rate": 4.9423681170892465e-06, "loss": 2.5137775421142576, "step": 93290 }, { "epoch": 0.7531299694066176, "grad_norm": 0.7322260737419128, "learning_rate": 4.940752647351033e-06, "loss": 2.4993209838867188, "step": 93300 }, { "epoch": 0.7532106907323846, "grad_norm": 1.2007311582565308, "learning_rate": 4.93913717761282e-06, "loss": 2.8627132415771483, "step": 93310 }, { "epoch": 0.7532914120581516, "grad_norm": 0.9582931995391846, "learning_rate": 4.937521707874607e-06, "loss": 2.185626411437988, "step": 93320 }, { "epoch": 0.7533721333839187, "grad_norm": 0.8800129890441895, "learning_rate": 4.935906238136394e-06, "loss": 2.608880805969238, "step": 93330 }, { "epoch": 0.7534528547096857, "grad_norm": 1.237629771232605, "learning_rate": 4.934290768398181e-06, "loss": 2.827829933166504, "step": 93340 }, { "epoch": 0.7535335760354528, "grad_norm": 1.1659436225891113, "learning_rate": 4.9326752986599685e-06, "loss": 2.4354991912841797, "step": 93350 }, { "epoch": 0.7536142973612199, "grad_norm": 0.956753134727478, "learning_rate": 4.931059828921755e-06, "loss": 2.6368745803833007, "step": 93360 }, { "epoch": 0.753695018686987, "grad_norm": 0.6360783576965332, "learning_rate": 4.929444359183542e-06, "loss": 2.36199951171875, "step": 93370 }, { "epoch": 0.7537757400127539, "grad_norm": 1.1304084062576294, "learning_rate": 4.927828889445329e-06, "loss": 2.710901069641113, "step": 93380 }, { "epoch": 0.753856461338521, "grad_norm": 0.8318954706192017, "learning_rate": 4.926213419707116e-06, "loss": 2.5621395111083984, "step": 93390 }, { "epoch": 0.7539371826642881, "grad_norm": 1.0305825471878052, "learning_rate": 4.924597949968903e-06, "loss": 2.752268600463867, "step": 93400 }, { "epoch": 0.7540179039900551, "grad_norm": 0.8596271276473999, "learning_rate": 4.9229824802306895e-06, "loss": 2.7143211364746094, "step": 93410 }, { "epoch": 0.7540986253158222, "grad_norm": 0.6847639083862305, "learning_rate": 4.921367010492476e-06, "loss": 2.3181461334228515, "step": 93420 }, { "epoch": 0.7541793466415893, "grad_norm": 0.7300137281417847, "learning_rate": 4.919751540754263e-06, "loss": 2.858185577392578, "step": 93430 }, { "epoch": 0.7542600679673563, "grad_norm": 1.104982614517212, "learning_rate": 4.91813607101605e-06, "loss": 2.444381523132324, "step": 93440 }, { "epoch": 0.7543407892931233, "grad_norm": 0.6791404485702515, "learning_rate": 4.916520601277837e-06, "loss": 2.5228158950805666, "step": 93450 }, { "epoch": 0.7544215106188904, "grad_norm": 1.001488208770752, "learning_rate": 4.914905131539624e-06, "loss": 2.4802345275878905, "step": 93460 }, { "epoch": 0.7545022319446575, "grad_norm": 1.4155112504959106, "learning_rate": 4.913289661801411e-06, "loss": 2.693894386291504, "step": 93470 }, { "epoch": 0.7545829532704245, "grad_norm": 0.6512417197227478, "learning_rate": 4.9116741920631974e-06, "loss": 2.81806640625, "step": 93480 }, { "epoch": 0.7546636745961915, "grad_norm": 1.0409246683120728, "learning_rate": 4.910058722324984e-06, "loss": 2.8204538345336916, "step": 93490 }, { "epoch": 0.7547443959219586, "grad_norm": 0.9325963258743286, "learning_rate": 4.908443252586771e-06, "loss": 2.596813201904297, "step": 93500 }, { "epoch": 0.7548251172477257, "grad_norm": 0.8397398591041565, "learning_rate": 4.906827782848558e-06, "loss": 2.7185136795043947, "step": 93510 }, { "epoch": 0.7549058385734927, "grad_norm": 0.6491996645927429, "learning_rate": 4.905212313110345e-06, "loss": 2.723362922668457, "step": 93520 }, { "epoch": 0.7549865598992598, "grad_norm": 1.0672556161880493, "learning_rate": 4.903596843372132e-06, "loss": 3.085776138305664, "step": 93530 }, { "epoch": 0.7550672812250269, "grad_norm": 0.8257390260696411, "learning_rate": 4.9019813736339185e-06, "loss": 2.8541101455688476, "step": 93540 }, { "epoch": 0.7551480025507938, "grad_norm": 1.1166095733642578, "learning_rate": 4.900365903895705e-06, "loss": 2.3130191802978515, "step": 93550 }, { "epoch": 0.7552287238765609, "grad_norm": 0.7154578566551208, "learning_rate": 4.898750434157492e-06, "loss": 2.922150421142578, "step": 93560 }, { "epoch": 0.755309445202328, "grad_norm": 0.895875096321106, "learning_rate": 4.89713496441928e-06, "loss": 2.6234649658203124, "step": 93570 }, { "epoch": 0.7553901665280951, "grad_norm": 1.1031908988952637, "learning_rate": 4.895519494681067e-06, "loss": 2.4457006454467773, "step": 93580 }, { "epoch": 0.7554708878538621, "grad_norm": 0.8845851421356201, "learning_rate": 4.893904024942854e-06, "loss": 2.723529052734375, "step": 93590 }, { "epoch": 0.7555516091796292, "grad_norm": 0.9107706546783447, "learning_rate": 4.8922885552046404e-06, "loss": 2.711527633666992, "step": 93600 }, { "epoch": 0.7556323305053962, "grad_norm": 1.2843824625015259, "learning_rate": 4.890673085466427e-06, "loss": 2.458120918273926, "step": 93610 }, { "epoch": 0.7557130518311633, "grad_norm": 0.7542784810066223, "learning_rate": 4.889057615728214e-06, "loss": 2.707992935180664, "step": 93620 }, { "epoch": 0.7557937731569303, "grad_norm": 0.9478172063827515, "learning_rate": 4.887442145990001e-06, "loss": 2.529329299926758, "step": 93630 }, { "epoch": 0.7558744944826974, "grad_norm": 1.3192909955978394, "learning_rate": 4.885826676251788e-06, "loss": 3.1610998153686523, "step": 93640 }, { "epoch": 0.7559552158084645, "grad_norm": 0.5971805453300476, "learning_rate": 4.884211206513575e-06, "loss": 2.5309978485107423, "step": 93650 }, { "epoch": 0.7560359371342315, "grad_norm": 0.6630522012710571, "learning_rate": 4.8825957367753615e-06, "loss": 2.5343603134155273, "step": 93660 }, { "epoch": 0.7561166584599985, "grad_norm": 0.5685423016548157, "learning_rate": 4.880980267037148e-06, "loss": 2.1562191009521485, "step": 93670 }, { "epoch": 0.7561973797857656, "grad_norm": 0.9431605339050293, "learning_rate": 4.879364797298935e-06, "loss": 2.8475101470947264, "step": 93680 }, { "epoch": 0.7562781011115327, "grad_norm": 1.038400650024414, "learning_rate": 4.877749327560722e-06, "loss": 2.605438232421875, "step": 93690 }, { "epoch": 0.7563588224372997, "grad_norm": 0.8947202563285828, "learning_rate": 4.876133857822509e-06, "loss": 3.0643762588500976, "step": 93700 }, { "epoch": 0.7564395437630668, "grad_norm": 1.1064531803131104, "learning_rate": 4.874518388084296e-06, "loss": 2.465117263793945, "step": 93710 }, { "epoch": 0.7565202650888339, "grad_norm": 0.8959358334541321, "learning_rate": 4.872902918346083e-06, "loss": 2.445574951171875, "step": 93720 }, { "epoch": 0.7566009864146008, "grad_norm": 0.9255631566047668, "learning_rate": 4.8712874486078694e-06, "loss": 2.2988954544067384, "step": 93730 }, { "epoch": 0.7566817077403679, "grad_norm": 1.286860704421997, "learning_rate": 4.869671978869656e-06, "loss": 2.5592325210571287, "step": 93740 }, { "epoch": 0.756762429066135, "grad_norm": 2.1406240463256836, "learning_rate": 4.868056509131443e-06, "loss": 2.4181711196899416, "step": 93750 }, { "epoch": 0.7568431503919021, "grad_norm": 0.9226115942001343, "learning_rate": 4.86644103939323e-06, "loss": 2.6635986328125, "step": 93760 }, { "epoch": 0.7569238717176691, "grad_norm": 1.0220863819122314, "learning_rate": 4.864825569655017e-06, "loss": 2.8776165008544923, "step": 93770 }, { "epoch": 0.7570045930434361, "grad_norm": 0.9155293107032776, "learning_rate": 4.863210099916804e-06, "loss": 2.517546844482422, "step": 93780 }, { "epoch": 0.7570853143692032, "grad_norm": 1.4605637788772583, "learning_rate": 4.8615946301785905e-06, "loss": 2.9304935455322267, "step": 93790 }, { "epoch": 0.7571660356949702, "grad_norm": 1.3246307373046875, "learning_rate": 4.859979160440377e-06, "loss": 2.6313102722167967, "step": 93800 }, { "epoch": 0.7572467570207373, "grad_norm": 0.7682738304138184, "learning_rate": 4.858363690702164e-06, "loss": 2.3720516204833983, "step": 93810 }, { "epoch": 0.7573274783465044, "grad_norm": 0.7861284017562866, "learning_rate": 4.856748220963951e-06, "loss": 2.5785554885864257, "step": 93820 }, { "epoch": 0.7574081996722715, "grad_norm": 0.9070462584495544, "learning_rate": 4.855132751225738e-06, "loss": 2.7131771087646483, "step": 93830 }, { "epoch": 0.7574889209980384, "grad_norm": 1.0653250217437744, "learning_rate": 4.853517281487525e-06, "loss": 2.332223129272461, "step": 93840 }, { "epoch": 0.7575696423238055, "grad_norm": 0.9482914209365845, "learning_rate": 4.851901811749312e-06, "loss": 2.349517250061035, "step": 93850 }, { "epoch": 0.7576503636495726, "grad_norm": 1.3809036016464233, "learning_rate": 4.8502863420110984e-06, "loss": 2.651092529296875, "step": 93860 }, { "epoch": 0.7577310849753396, "grad_norm": 1.0865696668624878, "learning_rate": 4.848670872272885e-06, "loss": 3.0127519607543944, "step": 93870 }, { "epoch": 0.7578118063011067, "grad_norm": 0.6517948508262634, "learning_rate": 4.847055402534672e-06, "loss": 2.9686866760253907, "step": 93880 }, { "epoch": 0.7578925276268738, "grad_norm": 0.9479278922080994, "learning_rate": 4.845439932796459e-06, "loss": 2.7348186492919924, "step": 93890 }, { "epoch": 0.7579732489526408, "grad_norm": 1.007745385169983, "learning_rate": 4.843824463058246e-06, "loss": 2.859954833984375, "step": 93900 }, { "epoch": 0.7580539702784078, "grad_norm": 0.7784778475761414, "learning_rate": 4.842208993320033e-06, "loss": 2.502188873291016, "step": 93910 }, { "epoch": 0.7581346916041749, "grad_norm": 1.2869950532913208, "learning_rate": 4.8405935235818195e-06, "loss": 3.6770931243896485, "step": 93920 }, { "epoch": 0.758215412929942, "grad_norm": 1.1880476474761963, "learning_rate": 4.838978053843606e-06, "loss": 2.7082851409912108, "step": 93930 }, { "epoch": 0.758296134255709, "grad_norm": 0.9493350386619568, "learning_rate": 4.837362584105393e-06, "loss": 2.3282934188842774, "step": 93940 }, { "epoch": 0.758376855581476, "grad_norm": 0.5747000575065613, "learning_rate": 4.83574711436718e-06, "loss": 2.2378549575805664, "step": 93950 }, { "epoch": 0.7584575769072431, "grad_norm": 0.6675050258636475, "learning_rate": 4.834131644628967e-06, "loss": 2.2578266143798826, "step": 93960 }, { "epoch": 0.7585382982330102, "grad_norm": 0.8585193753242493, "learning_rate": 4.832516174890755e-06, "loss": 2.509425926208496, "step": 93970 }, { "epoch": 0.7586190195587772, "grad_norm": 1.0819729566574097, "learning_rate": 4.8309007051525414e-06, "loss": 3.620648956298828, "step": 93980 }, { "epoch": 0.7586997408845443, "grad_norm": 1.0029586553573608, "learning_rate": 4.829285235414328e-06, "loss": 2.693879318237305, "step": 93990 }, { "epoch": 0.7587804622103114, "grad_norm": 0.9394513964653015, "learning_rate": 4.827669765676115e-06, "loss": 2.4285348892211913, "step": 94000 }, { "epoch": 0.7588611835360783, "grad_norm": 0.9547600746154785, "learning_rate": 4.826054295937902e-06, "loss": 2.4047924041748048, "step": 94010 }, { "epoch": 0.7589419048618454, "grad_norm": 1.4927349090576172, "learning_rate": 4.824438826199689e-06, "loss": 2.988292694091797, "step": 94020 }, { "epoch": 0.7590226261876125, "grad_norm": 0.8461482524871826, "learning_rate": 4.822823356461476e-06, "loss": 2.6000171661376954, "step": 94030 }, { "epoch": 0.7591033475133796, "grad_norm": 0.6059439778327942, "learning_rate": 4.8212078867232625e-06, "loss": 2.694595527648926, "step": 94040 }, { "epoch": 0.7591840688391466, "grad_norm": 0.8186408877372742, "learning_rate": 4.819592416985049e-06, "loss": 2.6657636642456053, "step": 94050 }, { "epoch": 0.7592647901649137, "grad_norm": 0.8332732319831848, "learning_rate": 4.817976947246836e-06, "loss": 2.485665702819824, "step": 94060 }, { "epoch": 0.7593455114906807, "grad_norm": 0.9712963700294495, "learning_rate": 4.816361477508623e-06, "loss": 2.7082492828369142, "step": 94070 }, { "epoch": 0.7594262328164478, "grad_norm": 0.9652090668678284, "learning_rate": 4.81474600777041e-06, "loss": 2.7608062744140627, "step": 94080 }, { "epoch": 0.7595069541422148, "grad_norm": 1.1080266237258911, "learning_rate": 4.813130538032197e-06, "loss": 2.825033187866211, "step": 94090 }, { "epoch": 0.7595876754679819, "grad_norm": 0.6656793355941772, "learning_rate": 4.8115150682939836e-06, "loss": 2.373196029663086, "step": 94100 }, { "epoch": 0.759668396793749, "grad_norm": 0.6547345519065857, "learning_rate": 4.80989959855577e-06, "loss": 2.704914855957031, "step": 94110 }, { "epoch": 0.759749118119516, "grad_norm": 1.36741304397583, "learning_rate": 4.808284128817557e-06, "loss": 3.105461311340332, "step": 94120 }, { "epoch": 0.759829839445283, "grad_norm": 0.6558690071105957, "learning_rate": 4.806668659079344e-06, "loss": 2.642422676086426, "step": 94130 }, { "epoch": 0.7599105607710501, "grad_norm": 1.7181446552276611, "learning_rate": 4.805053189341131e-06, "loss": 2.4395124435424806, "step": 94140 }, { "epoch": 0.7599912820968172, "grad_norm": 0.9810934066772461, "learning_rate": 4.803437719602918e-06, "loss": 2.337901496887207, "step": 94150 }, { "epoch": 0.7600720034225842, "grad_norm": 0.8958306312561035, "learning_rate": 4.801822249864705e-06, "loss": 2.9614032745361327, "step": 94160 }, { "epoch": 0.7601527247483513, "grad_norm": 1.1812108755111694, "learning_rate": 4.8002067801264915e-06, "loss": 2.514015769958496, "step": 94170 }, { "epoch": 0.7602334460741184, "grad_norm": 1.3463764190673828, "learning_rate": 4.798591310388278e-06, "loss": 2.3525197982788084, "step": 94180 }, { "epoch": 0.7603141673998853, "grad_norm": 1.16257643699646, "learning_rate": 4.796975840650065e-06, "loss": 2.5757652282714845, "step": 94190 }, { "epoch": 0.7603948887256524, "grad_norm": 1.151898741722107, "learning_rate": 4.795360370911852e-06, "loss": 2.285270118713379, "step": 94200 }, { "epoch": 0.7604756100514195, "grad_norm": 1.146098017692566, "learning_rate": 4.793744901173639e-06, "loss": 2.6889328002929687, "step": 94210 }, { "epoch": 0.7605563313771866, "grad_norm": 1.0595862865447998, "learning_rate": 4.792129431435426e-06, "loss": 2.667365074157715, "step": 94220 }, { "epoch": 0.7606370527029536, "grad_norm": 0.6966636180877686, "learning_rate": 4.7905139616972126e-06, "loss": 2.3371959686279298, "step": 94230 }, { "epoch": 0.7607177740287207, "grad_norm": 0.729213297367096, "learning_rate": 4.788898491958999e-06, "loss": 2.31585636138916, "step": 94240 }, { "epoch": 0.7607984953544877, "grad_norm": 1.1174557209014893, "learning_rate": 4.787283022220786e-06, "loss": 2.8376754760742187, "step": 94250 }, { "epoch": 0.7608792166802547, "grad_norm": 0.9828052520751953, "learning_rate": 4.785667552482574e-06, "loss": 2.59818115234375, "step": 94260 }, { "epoch": 0.7609599380060218, "grad_norm": 0.5517005324363708, "learning_rate": 4.784052082744361e-06, "loss": 2.6617544174194334, "step": 94270 }, { "epoch": 0.7610406593317889, "grad_norm": 1.4852782487869263, "learning_rate": 4.782436613006148e-06, "loss": 2.7701894760131838, "step": 94280 }, { "epoch": 0.761121380657556, "grad_norm": 1.3278290033340454, "learning_rate": 4.7808211432679345e-06, "loss": 2.5603096008300783, "step": 94290 }, { "epoch": 0.7612021019833229, "grad_norm": 0.5585957765579224, "learning_rate": 4.779205673529721e-06, "loss": 2.486445999145508, "step": 94300 }, { "epoch": 0.76128282330909, "grad_norm": 1.4036569595336914, "learning_rate": 4.777590203791508e-06, "loss": 2.5430576324462892, "step": 94310 }, { "epoch": 0.7613635446348571, "grad_norm": 1.097784399986267, "learning_rate": 4.775974734053295e-06, "loss": 2.6554586410522463, "step": 94320 }, { "epoch": 0.7614442659606241, "grad_norm": 1.1116318702697754, "learning_rate": 4.774359264315082e-06, "loss": 2.6678009033203125, "step": 94330 }, { "epoch": 0.7615249872863912, "grad_norm": 0.7332374453544617, "learning_rate": 4.772743794576869e-06, "loss": 2.8258771896362305, "step": 94340 }, { "epoch": 0.7616057086121583, "grad_norm": 0.8132504224777222, "learning_rate": 4.7711283248386556e-06, "loss": 2.801613426208496, "step": 94350 }, { "epoch": 0.7616864299379253, "grad_norm": 1.2783068418502808, "learning_rate": 4.769512855100442e-06, "loss": 2.470175933837891, "step": 94360 }, { "epoch": 0.7617671512636923, "grad_norm": 1.1112881898880005, "learning_rate": 4.767897385362229e-06, "loss": 2.094938850402832, "step": 94370 }, { "epoch": 0.7618478725894594, "grad_norm": 1.1007654666900635, "learning_rate": 4.766281915624016e-06, "loss": 2.6968427658081056, "step": 94380 }, { "epoch": 0.7619285939152265, "grad_norm": 1.237166166305542, "learning_rate": 4.764666445885803e-06, "loss": 2.6183650970458983, "step": 94390 }, { "epoch": 0.7620093152409935, "grad_norm": 0.926946759223938, "learning_rate": 4.76305097614759e-06, "loss": 2.5545251846313475, "step": 94400 }, { "epoch": 0.7620900365667606, "grad_norm": 0.6717908382415771, "learning_rate": 4.761435506409377e-06, "loss": 2.465309906005859, "step": 94410 }, { "epoch": 0.7621707578925276, "grad_norm": 1.1148855686187744, "learning_rate": 4.7598200366711635e-06, "loss": 2.974329948425293, "step": 94420 }, { "epoch": 0.7622514792182947, "grad_norm": 1.2178003787994385, "learning_rate": 4.75820456693295e-06, "loss": 2.2250188827514648, "step": 94430 }, { "epoch": 0.7623322005440617, "grad_norm": 0.9814984798431396, "learning_rate": 4.756589097194737e-06, "loss": 2.8850404739379885, "step": 94440 }, { "epoch": 0.7624129218698288, "grad_norm": 0.6405937671661377, "learning_rate": 4.754973627456524e-06, "loss": 2.6929664611816406, "step": 94450 }, { "epoch": 0.7624936431955959, "grad_norm": 1.5364818572998047, "learning_rate": 4.753358157718311e-06, "loss": 2.3808483123779296, "step": 94460 }, { "epoch": 0.7625743645213628, "grad_norm": 1.0829713344573975, "learning_rate": 4.751742687980098e-06, "loss": 2.6001075744628905, "step": 94470 }, { "epoch": 0.7626550858471299, "grad_norm": 0.6260270476341248, "learning_rate": 4.7501272182418846e-06, "loss": 2.6961536407470703, "step": 94480 }, { "epoch": 0.762735807172897, "grad_norm": 0.6899157166481018, "learning_rate": 4.748511748503671e-06, "loss": 2.3204545974731445, "step": 94490 }, { "epoch": 0.7628165284986641, "grad_norm": 1.9250602722167969, "learning_rate": 4.746896278765458e-06, "loss": 2.358333969116211, "step": 94500 }, { "epoch": 0.7628972498244311, "grad_norm": 0.9716091156005859, "learning_rate": 4.745280809027245e-06, "loss": 3.094569778442383, "step": 94510 }, { "epoch": 0.7629779711501982, "grad_norm": 0.9156559109687805, "learning_rate": 4.743665339289032e-06, "loss": 2.5935522079467774, "step": 94520 }, { "epoch": 0.7630586924759653, "grad_norm": 0.7710222005844116, "learning_rate": 4.742049869550819e-06, "loss": 2.4846033096313476, "step": 94530 }, { "epoch": 0.7631394138017322, "grad_norm": 0.7119225859642029, "learning_rate": 4.740434399812606e-06, "loss": 2.83538703918457, "step": 94540 }, { "epoch": 0.7632201351274993, "grad_norm": 0.7014706134796143, "learning_rate": 4.7388189300743925e-06, "loss": 2.8169445037841796, "step": 94550 }, { "epoch": 0.7633008564532664, "grad_norm": 0.847667932510376, "learning_rate": 4.737203460336179e-06, "loss": 2.3648550033569338, "step": 94560 }, { "epoch": 0.7633815777790335, "grad_norm": 0.8766056895256042, "learning_rate": 4.735587990597966e-06, "loss": 2.8263982772827148, "step": 94570 }, { "epoch": 0.7634622991048005, "grad_norm": 1.0336520671844482, "learning_rate": 4.733972520859753e-06, "loss": 2.103046417236328, "step": 94580 }, { "epoch": 0.7635430204305675, "grad_norm": 1.1601901054382324, "learning_rate": 4.73235705112154e-06, "loss": 3.020536994934082, "step": 94590 }, { "epoch": 0.7636237417563346, "grad_norm": 1.155319094657898, "learning_rate": 4.7307415813833276e-06, "loss": 2.5865140914916993, "step": 94600 }, { "epoch": 0.7637044630821017, "grad_norm": 0.8503178358078003, "learning_rate": 4.729126111645114e-06, "loss": 2.664478874206543, "step": 94610 }, { "epoch": 0.7637851844078687, "grad_norm": 0.9662119150161743, "learning_rate": 4.727510641906901e-06, "loss": 2.5132120132446287, "step": 94620 }, { "epoch": 0.7638659057336358, "grad_norm": 1.6689159870147705, "learning_rate": 4.725895172168688e-06, "loss": 2.875299835205078, "step": 94630 }, { "epoch": 0.7639466270594029, "grad_norm": 1.3480894565582275, "learning_rate": 4.724279702430475e-06, "loss": 2.5439857482910155, "step": 94640 }, { "epoch": 0.7640273483851698, "grad_norm": 1.3567543029785156, "learning_rate": 4.722664232692262e-06, "loss": 2.7229969024658205, "step": 94650 }, { "epoch": 0.7641080697109369, "grad_norm": 1.1435832977294922, "learning_rate": 4.721048762954049e-06, "loss": 2.4665624618530275, "step": 94660 }, { "epoch": 0.764188791036704, "grad_norm": 1.429452657699585, "learning_rate": 4.7194332932158355e-06, "loss": 2.6044218063354494, "step": 94670 }, { "epoch": 0.7642695123624711, "grad_norm": 0.8046675324440002, "learning_rate": 4.717817823477622e-06, "loss": 2.642980766296387, "step": 94680 }, { "epoch": 0.7643502336882381, "grad_norm": 0.7511782050132751, "learning_rate": 4.716202353739409e-06, "loss": 3.1431983947753905, "step": 94690 }, { "epoch": 0.7644309550140052, "grad_norm": 1.0724784135818481, "learning_rate": 4.714586884001196e-06, "loss": 2.918770980834961, "step": 94700 }, { "epoch": 0.7645116763397722, "grad_norm": 2.6089417934417725, "learning_rate": 4.712971414262983e-06, "loss": 2.6737823486328125, "step": 94710 }, { "epoch": 0.7645923976655392, "grad_norm": 0.8987176418304443, "learning_rate": 4.71135594452477e-06, "loss": 2.3544033050537108, "step": 94720 }, { "epoch": 0.7646731189913063, "grad_norm": 1.4833202362060547, "learning_rate": 4.7097404747865565e-06, "loss": 2.64013614654541, "step": 94730 }, { "epoch": 0.7647538403170734, "grad_norm": 0.7149755358695984, "learning_rate": 4.708125005048343e-06, "loss": 2.762241554260254, "step": 94740 }, { "epoch": 0.7648345616428405, "grad_norm": 0.7645449042320251, "learning_rate": 4.70650953531013e-06, "loss": 2.309453582763672, "step": 94750 }, { "epoch": 0.7649152829686074, "grad_norm": 0.7896018624305725, "learning_rate": 4.704894065571917e-06, "loss": 2.6606298446655274, "step": 94760 }, { "epoch": 0.7649960042943745, "grad_norm": 0.7106930613517761, "learning_rate": 4.703278595833704e-06, "loss": 2.2464410781860353, "step": 94770 }, { "epoch": 0.7650767256201416, "grad_norm": 0.595331609249115, "learning_rate": 4.701663126095491e-06, "loss": 2.1361616134643553, "step": 94780 }, { "epoch": 0.7651574469459086, "grad_norm": 0.9187968969345093, "learning_rate": 4.700047656357278e-06, "loss": 2.304977226257324, "step": 94790 }, { "epoch": 0.7652381682716757, "grad_norm": 0.957525372505188, "learning_rate": 4.6984321866190645e-06, "loss": 2.526058387756348, "step": 94800 }, { "epoch": 0.7653188895974428, "grad_norm": 1.0443675518035889, "learning_rate": 4.696816716880851e-06, "loss": 2.7182615280151365, "step": 94810 }, { "epoch": 0.7653996109232098, "grad_norm": 1.2127643823623657, "learning_rate": 4.695201247142638e-06, "loss": 2.5289722442626954, "step": 94820 }, { "epoch": 0.7654803322489768, "grad_norm": 1.3210843801498413, "learning_rate": 4.693585777404425e-06, "loss": 2.5832098007202147, "step": 94830 }, { "epoch": 0.7655610535747439, "grad_norm": 0.8951432108879089, "learning_rate": 4.691970307666212e-06, "loss": 2.789084625244141, "step": 94840 }, { "epoch": 0.765641774900511, "grad_norm": 0.6927184462547302, "learning_rate": 4.690354837927999e-06, "loss": 2.405117225646973, "step": 94850 }, { "epoch": 0.765722496226278, "grad_norm": 1.014597773551941, "learning_rate": 4.6887393681897855e-06, "loss": 2.5533512115478514, "step": 94860 }, { "epoch": 0.7658032175520451, "grad_norm": 1.1199616193771362, "learning_rate": 4.687123898451572e-06, "loss": 2.5644510269165037, "step": 94870 }, { "epoch": 0.7658839388778121, "grad_norm": 1.0004674196243286, "learning_rate": 4.685508428713359e-06, "loss": 2.657440185546875, "step": 94880 }, { "epoch": 0.7659646602035792, "grad_norm": 1.1542099714279175, "learning_rate": 4.683892958975146e-06, "loss": 2.9485733032226564, "step": 94890 }, { "epoch": 0.7660453815293462, "grad_norm": 0.9024749994277954, "learning_rate": 4.682277489236933e-06, "loss": 2.3685617446899414, "step": 94900 }, { "epoch": 0.7661261028551133, "grad_norm": 0.5437331795692444, "learning_rate": 4.68066201949872e-06, "loss": 2.5013355255126952, "step": 94910 }, { "epoch": 0.7662068241808804, "grad_norm": 0.8092525005340576, "learning_rate": 4.679046549760507e-06, "loss": 2.5374269485473633, "step": 94920 }, { "epoch": 0.7662875455066473, "grad_norm": 1.020750880241394, "learning_rate": 4.6774310800222935e-06, "loss": 2.8420894622802733, "step": 94930 }, { "epoch": 0.7663682668324144, "grad_norm": 0.9916203618049622, "learning_rate": 4.67581561028408e-06, "loss": 3.0717142105102537, "step": 94940 }, { "epoch": 0.7664489881581815, "grad_norm": 1.1971849203109741, "learning_rate": 4.674200140545867e-06, "loss": 2.5187841415405274, "step": 94950 }, { "epoch": 0.7665297094839486, "grad_norm": 1.0905060768127441, "learning_rate": 4.672584670807655e-06, "loss": 2.6260814666748047, "step": 94960 }, { "epoch": 0.7666104308097156, "grad_norm": 1.1423819065093994, "learning_rate": 4.670969201069442e-06, "loss": 2.5827951431274414, "step": 94970 }, { "epoch": 0.7666911521354827, "grad_norm": 1.2869864702224731, "learning_rate": 4.6693537313312285e-06, "loss": 2.935895538330078, "step": 94980 }, { "epoch": 0.7667718734612498, "grad_norm": 0.7262299060821533, "learning_rate": 4.667738261593015e-06, "loss": 2.3049127578735353, "step": 94990 }, { "epoch": 0.7668525947870167, "grad_norm": 0.6615192294120789, "learning_rate": 4.666122791854802e-06, "loss": 2.6981992721557617, "step": 95000 }, { "epoch": 0.7669333161127838, "grad_norm": 1.2058038711547852, "learning_rate": 4.664507322116589e-06, "loss": 2.6919525146484373, "step": 95010 }, { "epoch": 0.7670140374385509, "grad_norm": 1.7655494213104248, "learning_rate": 4.662891852378376e-06, "loss": 3.090729904174805, "step": 95020 }, { "epoch": 0.767094758764318, "grad_norm": 0.7254607677459717, "learning_rate": 4.661276382640163e-06, "loss": 2.696489715576172, "step": 95030 }, { "epoch": 0.767175480090085, "grad_norm": 1.152496576309204, "learning_rate": 4.65966091290195e-06, "loss": 2.7873512268066407, "step": 95040 }, { "epoch": 0.767256201415852, "grad_norm": 4.8879075050354, "learning_rate": 4.6580454431637365e-06, "loss": 3.405927276611328, "step": 95050 }, { "epoch": 0.7673369227416191, "grad_norm": 0.9536574482917786, "learning_rate": 4.656429973425523e-06, "loss": 2.317808723449707, "step": 95060 }, { "epoch": 0.7674176440673862, "grad_norm": 0.6853765249252319, "learning_rate": 4.65481450368731e-06, "loss": 2.676237106323242, "step": 95070 }, { "epoch": 0.7674983653931532, "grad_norm": 0.5771580338478088, "learning_rate": 4.653199033949097e-06, "loss": 2.45894775390625, "step": 95080 }, { "epoch": 0.7675790867189203, "grad_norm": 1.0689706802368164, "learning_rate": 4.651583564210884e-06, "loss": 2.6389032363891602, "step": 95090 }, { "epoch": 0.7676598080446874, "grad_norm": 0.7330049872398376, "learning_rate": 4.649968094472671e-06, "loss": 2.564304733276367, "step": 95100 }, { "epoch": 0.7677405293704543, "grad_norm": 1.0450605154037476, "learning_rate": 4.6483526247344575e-06, "loss": 2.8244741439819334, "step": 95110 }, { "epoch": 0.7678212506962214, "grad_norm": 0.8225381374359131, "learning_rate": 4.646737154996244e-06, "loss": 2.472249412536621, "step": 95120 }, { "epoch": 0.7679019720219885, "grad_norm": 1.000156283378601, "learning_rate": 4.645121685258031e-06, "loss": 2.4239995956420897, "step": 95130 }, { "epoch": 0.7679826933477556, "grad_norm": 1.1233779191970825, "learning_rate": 4.643506215519818e-06, "loss": 2.7439950942993163, "step": 95140 }, { "epoch": 0.7680634146735226, "grad_norm": 0.7441887259483337, "learning_rate": 4.641890745781605e-06, "loss": 2.651327896118164, "step": 95150 }, { "epoch": 0.7681441359992897, "grad_norm": 0.5658956170082092, "learning_rate": 4.640275276043392e-06, "loss": 2.3727752685546877, "step": 95160 }, { "epoch": 0.7682248573250567, "grad_norm": 0.7547391653060913, "learning_rate": 4.638659806305179e-06, "loss": 2.4962156295776365, "step": 95170 }, { "epoch": 0.7683055786508237, "grad_norm": 0.852538526058197, "learning_rate": 4.6370443365669654e-06, "loss": 2.5796798706054687, "step": 95180 }, { "epoch": 0.7683862999765908, "grad_norm": 0.7493690848350525, "learning_rate": 4.635428866828752e-06, "loss": 2.9387136459350587, "step": 95190 }, { "epoch": 0.7684670213023579, "grad_norm": 0.9281145334243774, "learning_rate": 4.633813397090539e-06, "loss": 2.7004714965820313, "step": 95200 }, { "epoch": 0.768547742628125, "grad_norm": 0.804215133190155, "learning_rate": 4.632197927352326e-06, "loss": 2.3346750259399416, "step": 95210 }, { "epoch": 0.768628463953892, "grad_norm": 1.4345160722732544, "learning_rate": 4.630582457614113e-06, "loss": 2.7014421463012694, "step": 95220 }, { "epoch": 0.768709185279659, "grad_norm": 1.303819179534912, "learning_rate": 4.6289669878759005e-06, "loss": 3.0032302856445314, "step": 95230 }, { "epoch": 0.7687899066054261, "grad_norm": 0.9151132702827454, "learning_rate": 4.627351518137687e-06, "loss": 2.236872673034668, "step": 95240 }, { "epoch": 0.7688706279311931, "grad_norm": 0.9502250552177429, "learning_rate": 4.625736048399474e-06, "loss": 2.7461496353149415, "step": 95250 }, { "epoch": 0.7689513492569602, "grad_norm": 1.1181317567825317, "learning_rate": 4.624120578661261e-06, "loss": 2.6023317337036134, "step": 95260 }, { "epoch": 0.7690320705827273, "grad_norm": 0.735555112361908, "learning_rate": 4.622505108923048e-06, "loss": 2.66611328125, "step": 95270 }, { "epoch": 0.7691127919084944, "grad_norm": 0.5411822199821472, "learning_rate": 4.620889639184835e-06, "loss": 2.6786155700683594, "step": 95280 }, { "epoch": 0.7691935132342613, "grad_norm": 0.8123780488967896, "learning_rate": 4.619274169446622e-06, "loss": 2.5425519943237305, "step": 95290 }, { "epoch": 0.7692742345600284, "grad_norm": 0.8397613763809204, "learning_rate": 4.6176586997084084e-06, "loss": 2.4186227798461912, "step": 95300 }, { "epoch": 0.7693549558857955, "grad_norm": 0.9879968166351318, "learning_rate": 4.616043229970195e-06, "loss": 2.6633832931518553, "step": 95310 }, { "epoch": 0.7694356772115625, "grad_norm": 0.9451721906661987, "learning_rate": 4.614427760231982e-06, "loss": 2.465974235534668, "step": 95320 }, { "epoch": 0.7695163985373296, "grad_norm": 0.7431235313415527, "learning_rate": 4.612812290493769e-06, "loss": 2.6399694442749024, "step": 95330 }, { "epoch": 0.7695971198630966, "grad_norm": 0.6514813899993896, "learning_rate": 4.611196820755556e-06, "loss": 2.862777900695801, "step": 95340 }, { "epoch": 0.7696778411888637, "grad_norm": 1.4172141551971436, "learning_rate": 4.609581351017343e-06, "loss": 3.060713195800781, "step": 95350 }, { "epoch": 0.7697585625146307, "grad_norm": 1.2395583391189575, "learning_rate": 4.6079658812791295e-06, "loss": 2.23446044921875, "step": 95360 }, { "epoch": 0.7698392838403978, "grad_norm": 1.4891513586044312, "learning_rate": 4.606350411540916e-06, "loss": 2.6421064376831054, "step": 95370 }, { "epoch": 0.7699200051661649, "grad_norm": 1.850465178489685, "learning_rate": 4.604734941802703e-06, "loss": 2.4795295715332033, "step": 95380 }, { "epoch": 0.7700007264919319, "grad_norm": 0.8125085830688477, "learning_rate": 4.60311947206449e-06, "loss": 2.440733528137207, "step": 95390 }, { "epoch": 0.7700814478176989, "grad_norm": 0.6850903034210205, "learning_rate": 4.601504002326277e-06, "loss": 2.8739717483520506, "step": 95400 }, { "epoch": 0.770162169143466, "grad_norm": 1.045411467552185, "learning_rate": 4.599888532588064e-06, "loss": 2.551652526855469, "step": 95410 }, { "epoch": 0.7702428904692331, "grad_norm": 0.8460765480995178, "learning_rate": 4.598273062849851e-06, "loss": 2.582837677001953, "step": 95420 }, { "epoch": 0.7703236117950001, "grad_norm": 0.9299761652946472, "learning_rate": 4.5966575931116374e-06, "loss": 2.669253921508789, "step": 95430 }, { "epoch": 0.7704043331207672, "grad_norm": 0.9408683180809021, "learning_rate": 4.595042123373424e-06, "loss": 2.384701728820801, "step": 95440 }, { "epoch": 0.7704850544465343, "grad_norm": 0.8750081658363342, "learning_rate": 4.593426653635211e-06, "loss": 3.041201591491699, "step": 95450 }, { "epoch": 0.7705657757723012, "grad_norm": 0.6054151058197021, "learning_rate": 4.591811183896998e-06, "loss": 2.8273187637329102, "step": 95460 }, { "epoch": 0.7706464970980683, "grad_norm": 1.3478959798812866, "learning_rate": 4.590195714158785e-06, "loss": 2.6906646728515624, "step": 95470 }, { "epoch": 0.7707272184238354, "grad_norm": 0.9496885538101196, "learning_rate": 4.588580244420572e-06, "loss": 3.3619422912597656, "step": 95480 }, { "epoch": 0.7708079397496025, "grad_norm": 1.1241482496261597, "learning_rate": 4.5869647746823585e-06, "loss": 2.5655233383178713, "step": 95490 }, { "epoch": 0.7708886610753695, "grad_norm": 0.9395366311073303, "learning_rate": 4.585349304944145e-06, "loss": 2.8281078338623047, "step": 95500 }, { "epoch": 0.7709693824011365, "grad_norm": 0.9197989702224731, "learning_rate": 4.583733835205932e-06, "loss": 2.2870750427246094, "step": 95510 }, { "epoch": 0.7710501037269036, "grad_norm": 1.5613356828689575, "learning_rate": 4.582118365467719e-06, "loss": 2.4794824600219725, "step": 95520 }, { "epoch": 0.7711308250526706, "grad_norm": 1.275802493095398, "learning_rate": 4.580502895729506e-06, "loss": 2.6911102294921876, "step": 95530 }, { "epoch": 0.7712115463784377, "grad_norm": 1.6295228004455566, "learning_rate": 4.578887425991293e-06, "loss": 2.4407243728637695, "step": 95540 }, { "epoch": 0.7712922677042048, "grad_norm": 0.6857605576515198, "learning_rate": 4.57727195625308e-06, "loss": 2.472108268737793, "step": 95550 }, { "epoch": 0.7713729890299719, "grad_norm": 0.9556310176849365, "learning_rate": 4.5756564865148664e-06, "loss": 2.5815832138061525, "step": 95560 }, { "epoch": 0.7714537103557388, "grad_norm": 0.6598567962646484, "learning_rate": 4.574041016776653e-06, "loss": 2.451815605163574, "step": 95570 }, { "epoch": 0.7715344316815059, "grad_norm": 1.0999410152435303, "learning_rate": 4.57242554703844e-06, "loss": 2.7142576217651366, "step": 95580 }, { "epoch": 0.771615153007273, "grad_norm": 1.9332804679870605, "learning_rate": 4.570810077300227e-06, "loss": 2.713411331176758, "step": 95590 }, { "epoch": 0.7716958743330401, "grad_norm": 0.9552856087684631, "learning_rate": 4.569194607562014e-06, "loss": 2.216093063354492, "step": 95600 }, { "epoch": 0.7717765956588071, "grad_norm": 0.8495000600814819, "learning_rate": 4.567579137823801e-06, "loss": 3.011396026611328, "step": 95610 }, { "epoch": 0.7718573169845742, "grad_norm": 1.708642840385437, "learning_rate": 4.5659636680855875e-06, "loss": 2.8473058700561524, "step": 95620 }, { "epoch": 0.7719380383103412, "grad_norm": 1.3027924299240112, "learning_rate": 4.564348198347374e-06, "loss": 3.1910417556762694, "step": 95630 }, { "epoch": 0.7720187596361082, "grad_norm": 1.8551989793777466, "learning_rate": 4.562732728609161e-06, "loss": 3.0127737045288088, "step": 95640 }, { "epoch": 0.7720994809618753, "grad_norm": 0.7819997668266296, "learning_rate": 4.561117258870949e-06, "loss": 2.6188831329345703, "step": 95650 }, { "epoch": 0.7721802022876424, "grad_norm": 1.1711870431900024, "learning_rate": 4.559501789132736e-06, "loss": 2.896819496154785, "step": 95660 }, { "epoch": 0.7722609236134095, "grad_norm": 1.0560333728790283, "learning_rate": 4.557886319394523e-06, "loss": 2.7596006393432617, "step": 95670 }, { "epoch": 0.7723416449391765, "grad_norm": 1.316633939743042, "learning_rate": 4.5562708496563094e-06, "loss": 2.905552864074707, "step": 95680 }, { "epoch": 0.7724223662649435, "grad_norm": 0.9080268144607544, "learning_rate": 4.554655379918096e-06, "loss": 2.4526853561401367, "step": 95690 }, { "epoch": 0.7725030875907106, "grad_norm": 0.6642899513244629, "learning_rate": 4.553039910179883e-06, "loss": 2.37201042175293, "step": 95700 }, { "epoch": 0.7725838089164776, "grad_norm": 1.2031751871109009, "learning_rate": 4.55142444044167e-06, "loss": 2.3637598037719725, "step": 95710 }, { "epoch": 0.7726645302422447, "grad_norm": 1.4940993785858154, "learning_rate": 4.549808970703457e-06, "loss": 2.928740310668945, "step": 95720 }, { "epoch": 0.7727452515680118, "grad_norm": 0.9990853071212769, "learning_rate": 4.548193500965244e-06, "loss": 2.5757425308227537, "step": 95730 }, { "epoch": 0.7728259728937789, "grad_norm": 0.9217379093170166, "learning_rate": 4.5465780312270305e-06, "loss": 2.1686214447021483, "step": 95740 }, { "epoch": 0.7729066942195458, "grad_norm": 0.81476229429245, "learning_rate": 4.544962561488817e-06, "loss": 3.084633636474609, "step": 95750 }, { "epoch": 0.7729874155453129, "grad_norm": 0.7518223524093628, "learning_rate": 4.543347091750604e-06, "loss": 2.503743362426758, "step": 95760 }, { "epoch": 0.77306813687108, "grad_norm": 1.6502490043640137, "learning_rate": 4.541731622012391e-06, "loss": 3.139069175720215, "step": 95770 }, { "epoch": 0.773148858196847, "grad_norm": 1.2186589241027832, "learning_rate": 4.540116152274178e-06, "loss": 2.8167985916137694, "step": 95780 }, { "epoch": 0.7732295795226141, "grad_norm": 0.819286048412323, "learning_rate": 4.538500682535965e-06, "loss": 2.696236991882324, "step": 95790 }, { "epoch": 0.7733103008483811, "grad_norm": 0.7019106149673462, "learning_rate": 4.5368852127977516e-06, "loss": 2.5843360900878904, "step": 95800 }, { "epoch": 0.7733910221741482, "grad_norm": 0.9119117856025696, "learning_rate": 4.535269743059538e-06, "loss": 2.7493505477905273, "step": 95810 }, { "epoch": 0.7734717434999152, "grad_norm": 0.7608279585838318, "learning_rate": 4.533654273321325e-06, "loss": 2.8864810943603514, "step": 95820 }, { "epoch": 0.7735524648256823, "grad_norm": 1.3139426708221436, "learning_rate": 4.532038803583112e-06, "loss": 2.876559829711914, "step": 95830 }, { "epoch": 0.7736331861514494, "grad_norm": 1.0906040668487549, "learning_rate": 4.530423333844899e-06, "loss": 2.903037261962891, "step": 95840 }, { "epoch": 0.7737139074772164, "grad_norm": 0.979239821434021, "learning_rate": 4.528807864106687e-06, "loss": 3.0461076736450194, "step": 95850 }, { "epoch": 0.7737946288029834, "grad_norm": 1.3383135795593262, "learning_rate": 4.5271923943684735e-06, "loss": 2.9720935821533203, "step": 95860 }, { "epoch": 0.7738753501287505, "grad_norm": 1.1539782285690308, "learning_rate": 4.52557692463026e-06, "loss": 2.6265844345092773, "step": 95870 }, { "epoch": 0.7739560714545176, "grad_norm": 1.1527705192565918, "learning_rate": 4.523961454892047e-06, "loss": 2.6486663818359375, "step": 95880 }, { "epoch": 0.7740367927802846, "grad_norm": 0.8218302726745605, "learning_rate": 4.522345985153834e-06, "loss": 2.42840690612793, "step": 95890 }, { "epoch": 0.7741175141060517, "grad_norm": 0.6737305521965027, "learning_rate": 4.520730515415621e-06, "loss": 2.36911563873291, "step": 95900 }, { "epoch": 0.7741982354318188, "grad_norm": 1.0472177267074585, "learning_rate": 4.519115045677408e-06, "loss": 2.380088233947754, "step": 95910 }, { "epoch": 0.7742789567575857, "grad_norm": 1.3066967725753784, "learning_rate": 4.5174995759391946e-06, "loss": 2.428740119934082, "step": 95920 }, { "epoch": 0.7743596780833528, "grad_norm": 1.0417460203170776, "learning_rate": 4.515884106200981e-06, "loss": 2.3883142471313477, "step": 95930 }, { "epoch": 0.7744403994091199, "grad_norm": 1.024183750152588, "learning_rate": 4.514268636462768e-06, "loss": 2.203024482727051, "step": 95940 }, { "epoch": 0.774521120734887, "grad_norm": 0.6553815603256226, "learning_rate": 4.512653166724555e-06, "loss": 2.3514425277709963, "step": 95950 }, { "epoch": 0.774601842060654, "grad_norm": 1.141632318496704, "learning_rate": 4.511037696986342e-06, "loss": 3.0091028213500977, "step": 95960 }, { "epoch": 0.774682563386421, "grad_norm": 0.5996136665344238, "learning_rate": 4.509422227248129e-06, "loss": 2.3769704818725588, "step": 95970 }, { "epoch": 0.7747632847121881, "grad_norm": 0.7141093015670776, "learning_rate": 4.507806757509916e-06, "loss": 2.238072967529297, "step": 95980 }, { "epoch": 0.7748440060379551, "grad_norm": 0.7344898581504822, "learning_rate": 4.5061912877717025e-06, "loss": 2.9548778533935547, "step": 95990 }, { "epoch": 0.7749247273637222, "grad_norm": 0.8001735806465149, "learning_rate": 4.504575818033489e-06, "loss": 2.421662521362305, "step": 96000 }, { "epoch": 0.7750054486894893, "grad_norm": 1.1999629735946655, "learning_rate": 4.502960348295276e-06, "loss": 2.7252880096435548, "step": 96010 }, { "epoch": 0.7750861700152564, "grad_norm": 1.3127617835998535, "learning_rate": 4.501344878557063e-06, "loss": 3.0301504135131836, "step": 96020 }, { "epoch": 0.7751668913410233, "grad_norm": 0.8222053050994873, "learning_rate": 4.49972940881885e-06, "loss": 2.7668312072753904, "step": 96030 }, { "epoch": 0.7752476126667904, "grad_norm": 0.6498560309410095, "learning_rate": 4.498113939080637e-06, "loss": 2.312378692626953, "step": 96040 }, { "epoch": 0.7753283339925575, "grad_norm": 0.76810222864151, "learning_rate": 4.4964984693424236e-06, "loss": 2.3819402694702148, "step": 96050 }, { "epoch": 0.7754090553183246, "grad_norm": 0.9213168621063232, "learning_rate": 4.49488299960421e-06, "loss": 2.5624153137207033, "step": 96060 }, { "epoch": 0.7754897766440916, "grad_norm": 1.143915057182312, "learning_rate": 4.493267529865997e-06, "loss": 2.503739356994629, "step": 96070 }, { "epoch": 0.7755704979698587, "grad_norm": 0.9774268269538879, "learning_rate": 4.491652060127784e-06, "loss": 2.5840923309326174, "step": 96080 }, { "epoch": 0.7756512192956257, "grad_norm": 0.8763678669929504, "learning_rate": 4.490036590389571e-06, "loss": 2.6882123947143555, "step": 96090 }, { "epoch": 0.7757319406213927, "grad_norm": 1.3835246562957764, "learning_rate": 4.488421120651358e-06, "loss": 2.2112041473388673, "step": 96100 }, { "epoch": 0.7758126619471598, "grad_norm": 0.4935207664966583, "learning_rate": 4.486805650913145e-06, "loss": 2.464248275756836, "step": 96110 }, { "epoch": 0.7758933832729269, "grad_norm": 0.6565567851066589, "learning_rate": 4.4851901811749315e-06, "loss": 2.5106412887573244, "step": 96120 }, { "epoch": 0.775974104598694, "grad_norm": 0.5971043109893799, "learning_rate": 4.483574711436718e-06, "loss": 2.738913726806641, "step": 96130 }, { "epoch": 0.776054825924461, "grad_norm": 1.06141197681427, "learning_rate": 4.481959241698505e-06, "loss": 2.722622871398926, "step": 96140 }, { "epoch": 0.776135547250228, "grad_norm": 1.1274701356887817, "learning_rate": 4.480343771960292e-06, "loss": 2.566044235229492, "step": 96150 }, { "epoch": 0.7762162685759951, "grad_norm": 0.9629140496253967, "learning_rate": 4.478728302222079e-06, "loss": 2.5662837982177735, "step": 96160 }, { "epoch": 0.7762969899017621, "grad_norm": 0.7204901576042175, "learning_rate": 4.477112832483866e-06, "loss": 3.0077342987060547, "step": 96170 }, { "epoch": 0.7763777112275292, "grad_norm": 2.606863021850586, "learning_rate": 4.4754973627456526e-06, "loss": 2.708541679382324, "step": 96180 }, { "epoch": 0.7764584325532963, "grad_norm": 0.9465569853782654, "learning_rate": 4.473881893007439e-06, "loss": 2.2654191970825197, "step": 96190 }, { "epoch": 0.7765391538790634, "grad_norm": 0.890182614326477, "learning_rate": 4.472266423269226e-06, "loss": 2.9344928741455076, "step": 96200 }, { "epoch": 0.7766198752048303, "grad_norm": 0.8720362186431885, "learning_rate": 4.470650953531013e-06, "loss": 2.3541393280029297, "step": 96210 }, { "epoch": 0.7767005965305974, "grad_norm": 0.9272173047065735, "learning_rate": 4.4690354837928e-06, "loss": 2.4692972183227537, "step": 96220 }, { "epoch": 0.7767813178563645, "grad_norm": 1.0262565612792969, "learning_rate": 4.467420014054587e-06, "loss": 3.127978515625, "step": 96230 }, { "epoch": 0.7768620391821315, "grad_norm": 0.7778814435005188, "learning_rate": 4.465804544316374e-06, "loss": 2.236606979370117, "step": 96240 }, { "epoch": 0.7769427605078986, "grad_norm": 1.0127979516983032, "learning_rate": 4.4641890745781605e-06, "loss": 2.4818546295166017, "step": 96250 }, { "epoch": 0.7770234818336657, "grad_norm": 0.9009873270988464, "learning_rate": 4.462573604839947e-06, "loss": 2.8865514755249024, "step": 96260 }, { "epoch": 0.7771042031594327, "grad_norm": 0.9523217678070068, "learning_rate": 4.460958135101734e-06, "loss": 2.2198596954345704, "step": 96270 }, { "epoch": 0.7771849244851997, "grad_norm": 1.3736227750778198, "learning_rate": 4.459342665363521e-06, "loss": 2.462718391418457, "step": 96280 }, { "epoch": 0.7772656458109668, "grad_norm": 1.6475564241409302, "learning_rate": 4.457727195625308e-06, "loss": 2.53124885559082, "step": 96290 }, { "epoch": 0.7773463671367339, "grad_norm": 0.9552711248397827, "learning_rate": 4.456111725887095e-06, "loss": 2.459735298156738, "step": 96300 }, { "epoch": 0.7774270884625009, "grad_norm": 0.7997943162918091, "learning_rate": 4.4544962561488816e-06, "loss": 2.3303985595703125, "step": 96310 }, { "epoch": 0.777507809788268, "grad_norm": 1.265557885169983, "learning_rate": 4.452880786410668e-06, "loss": 2.587264060974121, "step": 96320 }, { "epoch": 0.777588531114035, "grad_norm": 1.1766626834869385, "learning_rate": 4.451265316672455e-06, "loss": 2.1536645889282227, "step": 96330 }, { "epoch": 0.7776692524398021, "grad_norm": 1.0692787170410156, "learning_rate": 4.449649846934242e-06, "loss": 2.9092899322509767, "step": 96340 }, { "epoch": 0.7777499737655691, "grad_norm": 1.1750767230987549, "learning_rate": 4.44803437719603e-06, "loss": 2.5769826889038088, "step": 96350 }, { "epoch": 0.7778306950913362, "grad_norm": 1.2615753412246704, "learning_rate": 4.446418907457817e-06, "loss": 2.7340415954589843, "step": 96360 }, { "epoch": 0.7779114164171033, "grad_norm": 0.8575026392936707, "learning_rate": 4.4448034377196035e-06, "loss": 2.6993213653564454, "step": 96370 }, { "epoch": 0.7779921377428702, "grad_norm": 0.7292365431785583, "learning_rate": 4.44318796798139e-06, "loss": 2.7500497817993166, "step": 96380 }, { "epoch": 0.7780728590686373, "grad_norm": 0.5802809596061707, "learning_rate": 4.441572498243177e-06, "loss": 2.4435565948486326, "step": 96390 }, { "epoch": 0.7781535803944044, "grad_norm": 1.2340161800384521, "learning_rate": 4.439957028504964e-06, "loss": 2.3494930267333984, "step": 96400 }, { "epoch": 0.7782343017201715, "grad_norm": 1.1318838596343994, "learning_rate": 4.438341558766751e-06, "loss": 2.8274520874023437, "step": 96410 }, { "epoch": 0.7783150230459385, "grad_norm": 1.1049925088882446, "learning_rate": 4.436726089028538e-06, "loss": 2.390747833251953, "step": 96420 }, { "epoch": 0.7783957443717056, "grad_norm": 1.1410835981369019, "learning_rate": 4.4351106192903245e-06, "loss": 2.3469797134399415, "step": 96430 }, { "epoch": 0.7784764656974726, "grad_norm": 1.0163216590881348, "learning_rate": 4.433495149552111e-06, "loss": 2.5234907150268553, "step": 96440 }, { "epoch": 0.7785571870232396, "grad_norm": 0.7769381999969482, "learning_rate": 4.431879679813898e-06, "loss": 2.5243310928344727, "step": 96450 }, { "epoch": 0.7786379083490067, "grad_norm": 0.861721396446228, "learning_rate": 4.430264210075685e-06, "loss": 2.4485357284545897, "step": 96460 }, { "epoch": 0.7787186296747738, "grad_norm": 0.857421875, "learning_rate": 4.428648740337472e-06, "loss": 2.934595489501953, "step": 96470 }, { "epoch": 0.7787993510005409, "grad_norm": 0.7128262519836426, "learning_rate": 4.427033270599259e-06, "loss": 2.3446773529052733, "step": 96480 }, { "epoch": 0.7788800723263078, "grad_norm": 0.8799536228179932, "learning_rate": 4.425417800861046e-06, "loss": 2.3697059631347654, "step": 96490 }, { "epoch": 0.7789607936520749, "grad_norm": 0.7258084416389465, "learning_rate": 4.4238023311228325e-06, "loss": 2.504703140258789, "step": 96500 }, { "epoch": 0.779041514977842, "grad_norm": 1.2675976753234863, "learning_rate": 4.422186861384619e-06, "loss": 2.8608285903930666, "step": 96510 }, { "epoch": 0.7791222363036091, "grad_norm": 2.1172361373901367, "learning_rate": 4.420571391646407e-06, "loss": 3.568387985229492, "step": 96520 }, { "epoch": 0.7792029576293761, "grad_norm": 1.0719916820526123, "learning_rate": 4.418955921908194e-06, "loss": 2.3867729187011717, "step": 96530 }, { "epoch": 0.7792836789551432, "grad_norm": 1.131745457649231, "learning_rate": 4.417340452169981e-06, "loss": 2.123050498962402, "step": 96540 }, { "epoch": 0.7793644002809103, "grad_norm": 0.5638901591300964, "learning_rate": 4.4157249824317675e-06, "loss": 2.859671974182129, "step": 96550 }, { "epoch": 0.7794451216066772, "grad_norm": 1.355474591255188, "learning_rate": 4.414109512693554e-06, "loss": 2.444522476196289, "step": 96560 }, { "epoch": 0.7795258429324443, "grad_norm": 1.383887767791748, "learning_rate": 4.412494042955341e-06, "loss": 2.7214433670043947, "step": 96570 }, { "epoch": 0.7796065642582114, "grad_norm": 0.9186393618583679, "learning_rate": 4.410878573217128e-06, "loss": 2.4283212661743163, "step": 96580 }, { "epoch": 0.7796872855839785, "grad_norm": 1.3561753034591675, "learning_rate": 4.409263103478915e-06, "loss": 2.669373321533203, "step": 96590 }, { "epoch": 0.7797680069097455, "grad_norm": 1.0595066547393799, "learning_rate": 4.407647633740702e-06, "loss": 2.82556095123291, "step": 96600 }, { "epoch": 0.7798487282355125, "grad_norm": 1.0887316465377808, "learning_rate": 4.406032164002489e-06, "loss": 2.286487579345703, "step": 96610 }, { "epoch": 0.7799294495612796, "grad_norm": 0.8935179114341736, "learning_rate": 4.4044166942642755e-06, "loss": 2.659572410583496, "step": 96620 }, { "epoch": 0.7800101708870466, "grad_norm": 0.8496255874633789, "learning_rate": 4.402801224526062e-06, "loss": 2.7623321533203127, "step": 96630 }, { "epoch": 0.7800908922128137, "grad_norm": 0.8650956153869629, "learning_rate": 4.401185754787849e-06, "loss": 2.9702886581420898, "step": 96640 }, { "epoch": 0.7801716135385808, "grad_norm": 0.8442884087562561, "learning_rate": 4.399570285049636e-06, "loss": 2.279570388793945, "step": 96650 }, { "epoch": 0.7802523348643479, "grad_norm": 3.3803303241729736, "learning_rate": 4.397954815311423e-06, "loss": 2.9612548828125, "step": 96660 }, { "epoch": 0.7803330561901148, "grad_norm": 1.063275694847107, "learning_rate": 4.39633934557321e-06, "loss": 2.452015686035156, "step": 96670 }, { "epoch": 0.7804137775158819, "grad_norm": 0.6473342776298523, "learning_rate": 4.3947238758349965e-06, "loss": 2.6030492782592773, "step": 96680 }, { "epoch": 0.780494498841649, "grad_norm": 0.5879281759262085, "learning_rate": 4.393108406096783e-06, "loss": 2.2496856689453124, "step": 96690 }, { "epoch": 0.780575220167416, "grad_norm": 0.6147021651268005, "learning_rate": 4.39149293635857e-06, "loss": 2.027410697937012, "step": 96700 }, { "epoch": 0.7806559414931831, "grad_norm": 0.8631032109260559, "learning_rate": 4.389877466620357e-06, "loss": 2.3912858963012695, "step": 96710 }, { "epoch": 0.7807366628189502, "grad_norm": 0.9465065002441406, "learning_rate": 4.388261996882144e-06, "loss": 2.2792547225952147, "step": 96720 }, { "epoch": 0.7808173841447172, "grad_norm": 0.8541322350502014, "learning_rate": 4.386646527143931e-06, "loss": 2.7224327087402345, "step": 96730 }, { "epoch": 0.7808981054704842, "grad_norm": 0.5872308611869812, "learning_rate": 4.385031057405718e-06, "loss": 2.4695274353027346, "step": 96740 }, { "epoch": 0.7809788267962513, "grad_norm": 1.3868035078048706, "learning_rate": 4.3834155876675045e-06, "loss": 2.80330810546875, "step": 96750 }, { "epoch": 0.7810595481220184, "grad_norm": 0.7263317108154297, "learning_rate": 4.381800117929291e-06, "loss": 2.3213054656982424, "step": 96760 }, { "epoch": 0.7811402694477854, "grad_norm": 0.8215973973274231, "learning_rate": 4.380184648191078e-06, "loss": 2.1274696350097657, "step": 96770 }, { "epoch": 0.7812209907735524, "grad_norm": 0.711549699306488, "learning_rate": 4.378569178452865e-06, "loss": 2.5631486892700197, "step": 96780 }, { "epoch": 0.7813017120993195, "grad_norm": 1.4047282934188843, "learning_rate": 4.376953708714652e-06, "loss": 2.169874382019043, "step": 96790 }, { "epoch": 0.7813824334250866, "grad_norm": 1.4977723360061646, "learning_rate": 4.375338238976439e-06, "loss": 2.128643608093262, "step": 96800 }, { "epoch": 0.7814631547508536, "grad_norm": 0.6530953049659729, "learning_rate": 4.3737227692382255e-06, "loss": 2.732155418395996, "step": 96810 }, { "epoch": 0.7815438760766207, "grad_norm": 0.7627478837966919, "learning_rate": 4.372107299500012e-06, "loss": 1.902063751220703, "step": 96820 }, { "epoch": 0.7816245974023878, "grad_norm": 0.5566505193710327, "learning_rate": 4.370491829761799e-06, "loss": 2.8212726593017576, "step": 96830 }, { "epoch": 0.7817053187281547, "grad_norm": 0.9828370213508606, "learning_rate": 4.368876360023586e-06, "loss": 2.4641204833984376, "step": 96840 }, { "epoch": 0.7817860400539218, "grad_norm": 0.7371079325675964, "learning_rate": 4.367260890285373e-06, "loss": 2.575886535644531, "step": 96850 }, { "epoch": 0.7818667613796889, "grad_norm": 1.1623258590698242, "learning_rate": 4.36564542054716e-06, "loss": 2.8702503204345704, "step": 96860 }, { "epoch": 0.781947482705456, "grad_norm": 0.8956352472305298, "learning_rate": 4.364029950808947e-06, "loss": 2.514784049987793, "step": 96870 }, { "epoch": 0.782028204031223, "grad_norm": 2.4106528759002686, "learning_rate": 4.3624144810707335e-06, "loss": 2.3330787658691405, "step": 96880 }, { "epoch": 0.7821089253569901, "grad_norm": 0.8560632467269897, "learning_rate": 4.36079901133252e-06, "loss": 3.136599540710449, "step": 96890 }, { "epoch": 0.7821896466827571, "grad_norm": 1.516654372215271, "learning_rate": 4.359183541594307e-06, "loss": 2.5816564559936523, "step": 96900 }, { "epoch": 0.7822703680085241, "grad_norm": 0.5205486416816711, "learning_rate": 4.357568071856094e-06, "loss": 2.428358459472656, "step": 96910 }, { "epoch": 0.7823510893342912, "grad_norm": 1.2195899486541748, "learning_rate": 4.355952602117881e-06, "loss": 2.4799306869506834, "step": 96920 }, { "epoch": 0.7824318106600583, "grad_norm": 0.819202721118927, "learning_rate": 4.354337132379668e-06, "loss": 2.4083093643188476, "step": 96930 }, { "epoch": 0.7825125319858254, "grad_norm": 1.564011573791504, "learning_rate": 4.3527216626414545e-06, "loss": 3.513880157470703, "step": 96940 }, { "epoch": 0.7825932533115924, "grad_norm": 1.429398775100708, "learning_rate": 4.351106192903241e-06, "loss": 2.5870967864990235, "step": 96950 }, { "epoch": 0.7826739746373594, "grad_norm": 0.9257598519325256, "learning_rate": 4.349490723165028e-06, "loss": 2.3571523666381835, "step": 96960 }, { "epoch": 0.7827546959631265, "grad_norm": 0.6877382397651672, "learning_rate": 4.347875253426815e-06, "loss": 2.3953880310058593, "step": 96970 }, { "epoch": 0.7828354172888935, "grad_norm": 1.1547935009002686, "learning_rate": 4.346259783688602e-06, "loss": 2.480278205871582, "step": 96980 }, { "epoch": 0.7829161386146606, "grad_norm": 0.8608381152153015, "learning_rate": 4.344644313950389e-06, "loss": 2.2970869064331056, "step": 96990 }, { "epoch": 0.7829968599404277, "grad_norm": 0.7968284487724304, "learning_rate": 4.343028844212176e-06, "loss": 2.2691217422485352, "step": 97000 }, { "epoch": 0.7830775812661948, "grad_norm": 0.9009374380111694, "learning_rate": 4.3414133744739624e-06, "loss": 2.4161590576171874, "step": 97010 }, { "epoch": 0.7831583025919617, "grad_norm": 0.9906494617462158, "learning_rate": 4.339797904735749e-06, "loss": 2.5868906021118163, "step": 97020 }, { "epoch": 0.7832390239177288, "grad_norm": 1.23141348361969, "learning_rate": 4.338182434997536e-06, "loss": 2.6062692642211913, "step": 97030 }, { "epoch": 0.7833197452434959, "grad_norm": 0.7734613418579102, "learning_rate": 4.336566965259324e-06, "loss": 2.4595464706420898, "step": 97040 }, { "epoch": 0.783400466569263, "grad_norm": 0.8215625286102295, "learning_rate": 4.334951495521111e-06, "loss": 2.545114517211914, "step": 97050 }, { "epoch": 0.78348118789503, "grad_norm": 0.785499632358551, "learning_rate": 4.3333360257828975e-06, "loss": 2.282851219177246, "step": 97060 }, { "epoch": 0.783561909220797, "grad_norm": 0.6898525953292847, "learning_rate": 4.331720556044684e-06, "loss": 2.1273508071899414, "step": 97070 }, { "epoch": 0.7836426305465641, "grad_norm": 0.8621604442596436, "learning_rate": 4.330105086306471e-06, "loss": 2.344590187072754, "step": 97080 }, { "epoch": 0.7837233518723311, "grad_norm": 1.029282808303833, "learning_rate": 4.328489616568258e-06, "loss": 2.9334646224975587, "step": 97090 }, { "epoch": 0.7838040731980982, "grad_norm": 0.8559943437576294, "learning_rate": 4.326874146830045e-06, "loss": 2.3482692718505858, "step": 97100 }, { "epoch": 0.7838847945238653, "grad_norm": 0.6817748546600342, "learning_rate": 4.325258677091832e-06, "loss": 2.0589773178100588, "step": 97110 }, { "epoch": 0.7839655158496324, "grad_norm": 1.0854369401931763, "learning_rate": 4.323643207353619e-06, "loss": 2.5952133178710937, "step": 97120 }, { "epoch": 0.7840462371753993, "grad_norm": 0.5255721807479858, "learning_rate": 4.3220277376154054e-06, "loss": 2.6531402587890627, "step": 97130 }, { "epoch": 0.7841269585011664, "grad_norm": 1.7776439189910889, "learning_rate": 4.320412267877192e-06, "loss": 2.6538957595825194, "step": 97140 }, { "epoch": 0.7842076798269335, "grad_norm": 1.1853054761886597, "learning_rate": 4.318796798138979e-06, "loss": 2.6039974212646486, "step": 97150 }, { "epoch": 0.7842884011527005, "grad_norm": 1.8812557458877563, "learning_rate": 4.317181328400766e-06, "loss": 2.7872570037841795, "step": 97160 }, { "epoch": 0.7843691224784676, "grad_norm": 0.655421257019043, "learning_rate": 4.315565858662553e-06, "loss": 2.2498615264892576, "step": 97170 }, { "epoch": 0.7844498438042347, "grad_norm": 0.9402918219566345, "learning_rate": 4.31395038892434e-06, "loss": 2.4321100234985353, "step": 97180 }, { "epoch": 0.7845305651300017, "grad_norm": 0.7129901647567749, "learning_rate": 4.3123349191861265e-06, "loss": 2.3143165588378904, "step": 97190 }, { "epoch": 0.7846112864557687, "grad_norm": 2.2722182273864746, "learning_rate": 4.310719449447913e-06, "loss": 2.7359962463378906, "step": 97200 }, { "epoch": 0.7846920077815358, "grad_norm": 1.0678918361663818, "learning_rate": 4.3091039797097e-06, "loss": 2.3275426864624023, "step": 97210 }, { "epoch": 0.7847727291073029, "grad_norm": 1.1711903810501099, "learning_rate": 4.307488509971488e-06, "loss": 2.0650705337524413, "step": 97220 }, { "epoch": 0.7848534504330699, "grad_norm": 0.8827998638153076, "learning_rate": 4.305873040233275e-06, "loss": 2.4312557220458983, "step": 97230 }, { "epoch": 0.784934171758837, "grad_norm": 1.5125658512115479, "learning_rate": 4.304257570495062e-06, "loss": 2.5567298889160157, "step": 97240 }, { "epoch": 0.785014893084604, "grad_norm": 0.7615203261375427, "learning_rate": 4.3026421007568484e-06, "loss": 2.3625001907348633, "step": 97250 }, { "epoch": 0.7850956144103711, "grad_norm": 0.8829296827316284, "learning_rate": 4.301026631018635e-06, "loss": 2.695348930358887, "step": 97260 }, { "epoch": 0.7851763357361381, "grad_norm": 0.9175857901573181, "learning_rate": 4.299411161280422e-06, "loss": 2.5206554412841795, "step": 97270 }, { "epoch": 0.7852570570619052, "grad_norm": 1.1022005081176758, "learning_rate": 4.297795691542209e-06, "loss": 2.937801170349121, "step": 97280 }, { "epoch": 0.7853377783876723, "grad_norm": 0.9003457427024841, "learning_rate": 4.296180221803996e-06, "loss": 2.8413631439208986, "step": 97290 }, { "epoch": 0.7854184997134392, "grad_norm": 2.0068483352661133, "learning_rate": 4.294564752065783e-06, "loss": 2.5479763031005858, "step": 97300 }, { "epoch": 0.7854992210392063, "grad_norm": 0.5870174169540405, "learning_rate": 4.2929492823275695e-06, "loss": 2.7602399826049804, "step": 97310 }, { "epoch": 0.7855799423649734, "grad_norm": 0.8665106892585754, "learning_rate": 4.291333812589356e-06, "loss": 2.3711748123168945, "step": 97320 }, { "epoch": 0.7856606636907405, "grad_norm": 0.5861380696296692, "learning_rate": 4.289718342851143e-06, "loss": 2.5797212600708006, "step": 97330 }, { "epoch": 0.7857413850165075, "grad_norm": 1.3904529809951782, "learning_rate": 4.28810287311293e-06, "loss": 2.1421504974365235, "step": 97340 }, { "epoch": 0.7858221063422746, "grad_norm": 1.079172968864441, "learning_rate": 4.286487403374717e-06, "loss": 2.2523740768432616, "step": 97350 }, { "epoch": 0.7859028276680416, "grad_norm": 1.654581069946289, "learning_rate": 4.284871933636504e-06, "loss": 2.539455223083496, "step": 97360 }, { "epoch": 0.7859835489938086, "grad_norm": 2.034661054611206, "learning_rate": 4.283256463898291e-06, "loss": 2.4597599029541017, "step": 97370 }, { "epoch": 0.7860642703195757, "grad_norm": 0.7042801976203918, "learning_rate": 4.2816409941600774e-06, "loss": 2.2010915756225584, "step": 97380 }, { "epoch": 0.7861449916453428, "grad_norm": 1.0514682531356812, "learning_rate": 4.280025524421864e-06, "loss": 3.1081844329833985, "step": 97390 }, { "epoch": 0.7862257129711099, "grad_norm": 1.2307647466659546, "learning_rate": 4.278410054683651e-06, "loss": 2.46146183013916, "step": 97400 }, { "epoch": 0.7863064342968769, "grad_norm": 0.6349478363990784, "learning_rate": 4.276794584945438e-06, "loss": 3.2277881622314455, "step": 97410 }, { "epoch": 0.7863871556226439, "grad_norm": 1.0268079042434692, "learning_rate": 4.275179115207225e-06, "loss": 2.312775802612305, "step": 97420 }, { "epoch": 0.786467876948411, "grad_norm": 1.5542645454406738, "learning_rate": 4.273563645469012e-06, "loss": 2.278766059875488, "step": 97430 }, { "epoch": 0.786548598274178, "grad_norm": 0.6968475580215454, "learning_rate": 4.2719481757307985e-06, "loss": 2.4171539306640626, "step": 97440 }, { "epoch": 0.7866293195999451, "grad_norm": 0.5449364185333252, "learning_rate": 4.270332705992585e-06, "loss": 2.3692779541015625, "step": 97450 }, { "epoch": 0.7867100409257122, "grad_norm": 1.0668600797653198, "learning_rate": 4.268717236254372e-06, "loss": 2.509027862548828, "step": 97460 }, { "epoch": 0.7867907622514793, "grad_norm": 1.1049778461456299, "learning_rate": 4.267101766516159e-06, "loss": 2.350748062133789, "step": 97470 }, { "epoch": 0.7868714835772462, "grad_norm": 1.4738801717758179, "learning_rate": 4.265486296777946e-06, "loss": 2.211309242248535, "step": 97480 }, { "epoch": 0.7869522049030133, "grad_norm": 0.6448058485984802, "learning_rate": 4.263870827039733e-06, "loss": 2.322617530822754, "step": 97490 }, { "epoch": 0.7870329262287804, "grad_norm": 0.6095262169837952, "learning_rate": 4.26225535730152e-06, "loss": 2.592504692077637, "step": 97500 }, { "epoch": 0.7871136475545475, "grad_norm": 1.1462087631225586, "learning_rate": 4.2606398875633064e-06, "loss": 2.4696605682373045, "step": 97510 }, { "epoch": 0.7871943688803145, "grad_norm": 1.0731618404388428, "learning_rate": 4.259024417825093e-06, "loss": 2.502785301208496, "step": 97520 }, { "epoch": 0.7872750902060816, "grad_norm": 1.224661111831665, "learning_rate": 4.25740894808688e-06, "loss": 3.4229354858398438, "step": 97530 }, { "epoch": 0.7873558115318486, "grad_norm": 0.9846607446670532, "learning_rate": 4.255793478348667e-06, "loss": 2.2662635803222657, "step": 97540 }, { "epoch": 0.7874365328576156, "grad_norm": 0.9288412928581238, "learning_rate": 4.254178008610454e-06, "loss": 3.108357238769531, "step": 97550 }, { "epoch": 0.7875172541833827, "grad_norm": 1.5694900751113892, "learning_rate": 4.252562538872241e-06, "loss": 2.199489212036133, "step": 97560 }, { "epoch": 0.7875979755091498, "grad_norm": 0.6032038927078247, "learning_rate": 4.2509470691340275e-06, "loss": 2.6950780868530275, "step": 97570 }, { "epoch": 0.7876786968349169, "grad_norm": 0.637436032295227, "learning_rate": 4.249331599395814e-06, "loss": 2.909076118469238, "step": 97580 }, { "epoch": 0.7877594181606838, "grad_norm": 1.305963397026062, "learning_rate": 4.247716129657601e-06, "loss": 2.2892435073852537, "step": 97590 }, { "epoch": 0.7878401394864509, "grad_norm": 0.9894351959228516, "learning_rate": 4.246100659919388e-06, "loss": 2.602969169616699, "step": 97600 }, { "epoch": 0.787920860812218, "grad_norm": 0.6129400134086609, "learning_rate": 4.244485190181175e-06, "loss": 2.4548206329345703, "step": 97610 }, { "epoch": 0.788001582137985, "grad_norm": 0.885121762752533, "learning_rate": 4.242869720442962e-06, "loss": 2.9115591049194336, "step": 97620 }, { "epoch": 0.7880823034637521, "grad_norm": 1.0001754760742188, "learning_rate": 4.2412542507047486e-06, "loss": 3.3192848205566405, "step": 97630 }, { "epoch": 0.7881630247895192, "grad_norm": 1.2183387279510498, "learning_rate": 4.239638780966535e-06, "loss": 2.9137325286865234, "step": 97640 }, { "epoch": 0.7882437461152862, "grad_norm": 0.9276471734046936, "learning_rate": 4.238023311228322e-06, "loss": 2.3165374755859376, "step": 97650 }, { "epoch": 0.7883244674410532, "grad_norm": 0.8471028804779053, "learning_rate": 4.236407841490109e-06, "loss": 2.635119819641113, "step": 97660 }, { "epoch": 0.7884051887668203, "grad_norm": 1.6338365077972412, "learning_rate": 4.234792371751896e-06, "loss": 2.477311134338379, "step": 97670 }, { "epoch": 0.7884859100925874, "grad_norm": 1.0709670782089233, "learning_rate": 4.233176902013683e-06, "loss": 2.221065330505371, "step": 97680 }, { "epoch": 0.7885666314183544, "grad_norm": 1.5833274126052856, "learning_rate": 4.23156143227547e-06, "loss": 3.1075172424316406, "step": 97690 }, { "epoch": 0.7886473527441215, "grad_norm": 0.7869195342063904, "learning_rate": 4.2299459625372565e-06, "loss": 2.402943420410156, "step": 97700 }, { "epoch": 0.7887280740698885, "grad_norm": 1.15751051902771, "learning_rate": 4.228330492799043e-06, "loss": 2.703975868225098, "step": 97710 }, { "epoch": 0.7888087953956556, "grad_norm": 0.7629861831665039, "learning_rate": 4.22671502306083e-06, "loss": 2.4235889434814455, "step": 97720 }, { "epoch": 0.7888895167214226, "grad_norm": 1.1545618772506714, "learning_rate": 4.225099553322617e-06, "loss": 2.382794952392578, "step": 97730 }, { "epoch": 0.7889702380471897, "grad_norm": 0.9090781211853027, "learning_rate": 4.223484083584405e-06, "loss": 2.625834274291992, "step": 97740 }, { "epoch": 0.7890509593729568, "grad_norm": 0.8137601613998413, "learning_rate": 4.2218686138461916e-06, "loss": 2.9383935928344727, "step": 97750 }, { "epoch": 0.7891316806987237, "grad_norm": 1.1049182415008545, "learning_rate": 4.220253144107978e-06, "loss": 2.4404157638549804, "step": 97760 }, { "epoch": 0.7892124020244908, "grad_norm": 1.6968222856521606, "learning_rate": 4.218637674369765e-06, "loss": 2.7395694732666014, "step": 97770 }, { "epoch": 0.7892931233502579, "grad_norm": 1.7518041133880615, "learning_rate": 4.217022204631552e-06, "loss": 2.4895452499389648, "step": 97780 }, { "epoch": 0.789373844676025, "grad_norm": 1.338975191116333, "learning_rate": 4.215406734893339e-06, "loss": 2.7003963470458983, "step": 97790 }, { "epoch": 0.789454566001792, "grad_norm": 1.0226277112960815, "learning_rate": 4.213791265155126e-06, "loss": 2.3879421234130858, "step": 97800 }, { "epoch": 0.7895352873275591, "grad_norm": 1.0969821214675903, "learning_rate": 4.212175795416913e-06, "loss": 2.387456703186035, "step": 97810 }, { "epoch": 0.7896160086533262, "grad_norm": 1.2851489782333374, "learning_rate": 4.2105603256786995e-06, "loss": 2.2743452072143553, "step": 97820 }, { "epoch": 0.7896967299790931, "grad_norm": 1.4043794870376587, "learning_rate": 4.208944855940486e-06, "loss": 2.559867095947266, "step": 97830 }, { "epoch": 0.7897774513048602, "grad_norm": 0.8078262209892273, "learning_rate": 4.207329386202273e-06, "loss": 2.858982467651367, "step": 97840 }, { "epoch": 0.7898581726306273, "grad_norm": 1.0952783823013306, "learning_rate": 4.20571391646406e-06, "loss": 2.702313232421875, "step": 97850 }, { "epoch": 0.7899388939563944, "grad_norm": 0.997025191783905, "learning_rate": 4.204098446725847e-06, "loss": 2.471512222290039, "step": 97860 }, { "epoch": 0.7900196152821614, "grad_norm": 1.101311206817627, "learning_rate": 4.202482976987634e-06, "loss": 2.4782407760620115, "step": 97870 }, { "epoch": 0.7901003366079284, "grad_norm": 1.4342647790908813, "learning_rate": 4.2008675072494206e-06, "loss": 2.9851436614990234, "step": 97880 }, { "epoch": 0.7901810579336955, "grad_norm": 1.3094313144683838, "learning_rate": 4.199252037511207e-06, "loss": 2.937954902648926, "step": 97890 }, { "epoch": 0.7902617792594625, "grad_norm": 1.074018955230713, "learning_rate": 4.197636567772994e-06, "loss": 2.6788034439086914, "step": 97900 }, { "epoch": 0.7903425005852296, "grad_norm": 1.0432928800582886, "learning_rate": 4.196021098034782e-06, "loss": 3.386795425415039, "step": 97910 }, { "epoch": 0.7904232219109967, "grad_norm": 1.1405797004699707, "learning_rate": 4.194405628296569e-06, "loss": 2.3348594665527345, "step": 97920 }, { "epoch": 0.7905039432367638, "grad_norm": 0.972793698310852, "learning_rate": 4.192790158558356e-06, "loss": 2.867385673522949, "step": 97930 }, { "epoch": 0.7905846645625307, "grad_norm": 0.5729366540908813, "learning_rate": 4.1911746888201425e-06, "loss": 2.384535026550293, "step": 97940 }, { "epoch": 0.7906653858882978, "grad_norm": 1.2746498584747314, "learning_rate": 4.189559219081929e-06, "loss": 2.3259559631347657, "step": 97950 }, { "epoch": 0.7907461072140649, "grad_norm": 1.4430099725723267, "learning_rate": 4.187943749343716e-06, "loss": 2.7126384735107423, "step": 97960 }, { "epoch": 0.7908268285398319, "grad_norm": 1.1315768957138062, "learning_rate": 4.186328279605503e-06, "loss": 2.063610649108887, "step": 97970 }, { "epoch": 0.790907549865599, "grad_norm": 1.2419776916503906, "learning_rate": 4.18471280986729e-06, "loss": 2.702362060546875, "step": 97980 }, { "epoch": 0.790988271191366, "grad_norm": 0.9797328114509583, "learning_rate": 4.183097340129077e-06, "loss": 2.2625097274780273, "step": 97990 }, { "epoch": 0.7910689925171331, "grad_norm": 0.7822915315628052, "learning_rate": 4.1814818703908636e-06, "loss": 2.11169376373291, "step": 98000 }, { "epoch": 0.7911497138429001, "grad_norm": 0.7803675532341003, "learning_rate": 4.17986640065265e-06, "loss": 2.227629280090332, "step": 98010 }, { "epoch": 0.7912304351686672, "grad_norm": 0.6393792033195496, "learning_rate": 4.178250930914437e-06, "loss": 2.3420812606811525, "step": 98020 }, { "epoch": 0.7913111564944343, "grad_norm": 1.0788590908050537, "learning_rate": 4.176635461176224e-06, "loss": 2.615806770324707, "step": 98030 }, { "epoch": 0.7913918778202014, "grad_norm": 1.2253291606903076, "learning_rate": 4.175019991438011e-06, "loss": 2.517966461181641, "step": 98040 }, { "epoch": 0.7914725991459683, "grad_norm": 1.1857397556304932, "learning_rate": 4.173404521699798e-06, "loss": 3.165857696533203, "step": 98050 }, { "epoch": 0.7915533204717354, "grad_norm": 1.7670472860336304, "learning_rate": 4.171789051961585e-06, "loss": 3.0621898651123045, "step": 98060 }, { "epoch": 0.7916340417975025, "grad_norm": 1.5783365964889526, "learning_rate": 4.1701735822233715e-06, "loss": 2.4447301864624023, "step": 98070 }, { "epoch": 0.7917147631232695, "grad_norm": 1.1923493146896362, "learning_rate": 4.168558112485158e-06, "loss": 2.415293884277344, "step": 98080 }, { "epoch": 0.7917954844490366, "grad_norm": 0.9325276613235474, "learning_rate": 4.166942642746945e-06, "loss": 2.4444660186767577, "step": 98090 }, { "epoch": 0.7918762057748037, "grad_norm": 0.7870177626609802, "learning_rate": 4.165327173008732e-06, "loss": 3.1734729766845704, "step": 98100 }, { "epoch": 0.7919569271005708, "grad_norm": 0.6888657212257385, "learning_rate": 4.163711703270519e-06, "loss": 2.2525659561157227, "step": 98110 }, { "epoch": 0.7920376484263377, "grad_norm": 0.729041337966919, "learning_rate": 4.162096233532306e-06, "loss": 2.2165117263793945, "step": 98120 }, { "epoch": 0.7921183697521048, "grad_norm": 1.0867712497711182, "learning_rate": 4.1604807637940926e-06, "loss": 2.0529094696044923, "step": 98130 }, { "epoch": 0.7921990910778719, "grad_norm": 1.6486847400665283, "learning_rate": 4.158865294055879e-06, "loss": 2.5148624420166015, "step": 98140 }, { "epoch": 0.7922798124036389, "grad_norm": 0.7641394734382629, "learning_rate": 4.157249824317666e-06, "loss": 2.4301822662353514, "step": 98150 }, { "epoch": 0.792360533729406, "grad_norm": 1.0964815616607666, "learning_rate": 4.155634354579453e-06, "loss": 2.96451358795166, "step": 98160 }, { "epoch": 0.792441255055173, "grad_norm": 2.020318031311035, "learning_rate": 4.15401888484124e-06, "loss": 2.1476263046264648, "step": 98170 }, { "epoch": 0.7925219763809401, "grad_norm": 1.427563190460205, "learning_rate": 4.152403415103027e-06, "loss": 2.538422203063965, "step": 98180 }, { "epoch": 0.7926026977067071, "grad_norm": 1.1679604053497314, "learning_rate": 4.150787945364814e-06, "loss": 2.486092185974121, "step": 98190 }, { "epoch": 0.7926834190324742, "grad_norm": 0.995124101638794, "learning_rate": 4.1491724756266005e-06, "loss": 2.4577444076538084, "step": 98200 }, { "epoch": 0.7927641403582413, "grad_norm": 1.2903015613555908, "learning_rate": 4.147557005888387e-06, "loss": 2.497763442993164, "step": 98210 }, { "epoch": 0.7928448616840083, "grad_norm": 1.6547720432281494, "learning_rate": 4.145941536150174e-06, "loss": 2.7672683715820314, "step": 98220 }, { "epoch": 0.7929255830097753, "grad_norm": 1.4780226945877075, "learning_rate": 4.144326066411961e-06, "loss": 2.4405088424682617, "step": 98230 }, { "epoch": 0.7930063043355424, "grad_norm": 0.7591120600700378, "learning_rate": 4.142710596673748e-06, "loss": 2.6576919555664062, "step": 98240 }, { "epoch": 0.7930870256613095, "grad_norm": 0.9081717729568481, "learning_rate": 4.141095126935535e-06, "loss": 2.4882785797119142, "step": 98250 }, { "epoch": 0.7931677469870765, "grad_norm": 1.4046573638916016, "learning_rate": 4.1394796571973215e-06, "loss": 2.961174964904785, "step": 98260 }, { "epoch": 0.7932484683128436, "grad_norm": 1.0489861965179443, "learning_rate": 4.137864187459108e-06, "loss": 2.743670845031738, "step": 98270 }, { "epoch": 0.7933291896386107, "grad_norm": 1.000582218170166, "learning_rate": 4.136248717720895e-06, "loss": 2.023049163818359, "step": 98280 }, { "epoch": 0.7934099109643776, "grad_norm": 0.8587853312492371, "learning_rate": 4.134633247982682e-06, "loss": 2.442936325073242, "step": 98290 }, { "epoch": 0.7934906322901447, "grad_norm": 0.5661330223083496, "learning_rate": 4.133017778244469e-06, "loss": 2.5819597244262695, "step": 98300 }, { "epoch": 0.7935713536159118, "grad_norm": 1.0278522968292236, "learning_rate": 4.131402308506256e-06, "loss": 2.3683210372924806, "step": 98310 }, { "epoch": 0.7936520749416789, "grad_norm": 0.7010027766227722, "learning_rate": 4.129786838768043e-06, "loss": 2.5076566696166993, "step": 98320 }, { "epoch": 0.7937327962674459, "grad_norm": 0.8934730291366577, "learning_rate": 4.1281713690298295e-06, "loss": 2.4981416702270507, "step": 98330 }, { "epoch": 0.793813517593213, "grad_norm": 1.6585725545883179, "learning_rate": 4.126555899291616e-06, "loss": 2.770690155029297, "step": 98340 }, { "epoch": 0.79389423891898, "grad_norm": 0.75812828540802, "learning_rate": 4.124940429553403e-06, "loss": 2.350091743469238, "step": 98350 }, { "epoch": 0.793974960244747, "grad_norm": 0.7902414202690125, "learning_rate": 4.123324959815191e-06, "loss": 2.343646430969238, "step": 98360 }, { "epoch": 0.7940556815705141, "grad_norm": 1.120076060295105, "learning_rate": 4.121709490076978e-06, "loss": 2.568364906311035, "step": 98370 }, { "epoch": 0.7941364028962812, "grad_norm": 1.2369400262832642, "learning_rate": 4.1200940203387645e-06, "loss": 2.6188405990600585, "step": 98380 }, { "epoch": 0.7942171242220483, "grad_norm": 0.6153662800788879, "learning_rate": 4.118478550600551e-06, "loss": 2.5726511001586916, "step": 98390 }, { "epoch": 0.7942978455478152, "grad_norm": 1.0335084199905396, "learning_rate": 4.116863080862338e-06, "loss": 2.6619245529174806, "step": 98400 }, { "epoch": 0.7943785668735823, "grad_norm": 1.280508279800415, "learning_rate": 4.115247611124125e-06, "loss": 2.415676498413086, "step": 98410 }, { "epoch": 0.7944592881993494, "grad_norm": 0.8593847751617432, "learning_rate": 4.113632141385912e-06, "loss": 2.2312833786010744, "step": 98420 }, { "epoch": 0.7945400095251164, "grad_norm": 0.9901407361030579, "learning_rate": 4.112016671647699e-06, "loss": 2.569256591796875, "step": 98430 }, { "epoch": 0.7946207308508835, "grad_norm": 0.7853179574012756, "learning_rate": 4.110401201909486e-06, "loss": 2.325048828125, "step": 98440 }, { "epoch": 0.7947014521766506, "grad_norm": 1.5220637321472168, "learning_rate": 4.1087857321712725e-06, "loss": 2.6587778091430665, "step": 98450 }, { "epoch": 0.7947821735024176, "grad_norm": 0.7499403357505798, "learning_rate": 4.107170262433059e-06, "loss": 2.3191200256347657, "step": 98460 }, { "epoch": 0.7948628948281846, "grad_norm": 0.9095928072929382, "learning_rate": 4.105554792694846e-06, "loss": 2.185860252380371, "step": 98470 }, { "epoch": 0.7949436161539517, "grad_norm": 0.7460747957229614, "learning_rate": 4.103939322956633e-06, "loss": 2.443307304382324, "step": 98480 }, { "epoch": 0.7950243374797188, "grad_norm": 1.0512816905975342, "learning_rate": 4.10232385321842e-06, "loss": 2.4107376098632813, "step": 98490 }, { "epoch": 0.7951050588054859, "grad_norm": 0.6619044542312622, "learning_rate": 4.100708383480207e-06, "loss": 2.085881805419922, "step": 98500 }, { "epoch": 0.7951857801312529, "grad_norm": 0.8903518319129944, "learning_rate": 4.0990929137419935e-06, "loss": 2.8153438568115234, "step": 98510 }, { "epoch": 0.7952665014570199, "grad_norm": 0.7790343165397644, "learning_rate": 4.09747744400378e-06, "loss": 2.1513166427612305, "step": 98520 }, { "epoch": 0.795347222782787, "grad_norm": 0.8363877534866333, "learning_rate": 4.095861974265567e-06, "loss": 2.540335464477539, "step": 98530 }, { "epoch": 0.795427944108554, "grad_norm": 0.9348815679550171, "learning_rate": 4.094246504527354e-06, "loss": 2.2445959091186523, "step": 98540 }, { "epoch": 0.7955086654343211, "grad_norm": 0.5376124978065491, "learning_rate": 4.092631034789141e-06, "loss": 2.0611291885375977, "step": 98550 }, { "epoch": 0.7955893867600882, "grad_norm": 0.722446084022522, "learning_rate": 4.091015565050928e-06, "loss": 2.647152137756348, "step": 98560 }, { "epoch": 0.7956701080858553, "grad_norm": 0.9065859317779541, "learning_rate": 4.089400095312715e-06, "loss": 2.147639274597168, "step": 98570 }, { "epoch": 0.7957508294116222, "grad_norm": 0.8505083322525024, "learning_rate": 4.0877846255745015e-06, "loss": 2.356765937805176, "step": 98580 }, { "epoch": 0.7958315507373893, "grad_norm": 1.8644959926605225, "learning_rate": 4.086169155836288e-06, "loss": 2.616865539550781, "step": 98590 }, { "epoch": 0.7959122720631564, "grad_norm": 1.0058057308197021, "learning_rate": 4.084553686098075e-06, "loss": 2.223248100280762, "step": 98600 }, { "epoch": 0.7959929933889234, "grad_norm": 1.431555986404419, "learning_rate": 4.082938216359863e-06, "loss": 2.1147958755493166, "step": 98610 }, { "epoch": 0.7960737147146905, "grad_norm": 0.863916277885437, "learning_rate": 4.08132274662165e-06, "loss": 2.5683349609375, "step": 98620 }, { "epoch": 0.7961544360404575, "grad_norm": 1.0831689834594727, "learning_rate": 4.0797072768834365e-06, "loss": 2.5751216888427733, "step": 98630 }, { "epoch": 0.7962351573662246, "grad_norm": 1.3385145664215088, "learning_rate": 4.078091807145223e-06, "loss": 2.684343719482422, "step": 98640 }, { "epoch": 0.7963158786919916, "grad_norm": 0.8926145434379578, "learning_rate": 4.07647633740701e-06, "loss": 2.81579475402832, "step": 98650 }, { "epoch": 0.7963966000177587, "grad_norm": 0.8415090441703796, "learning_rate": 4.074860867668797e-06, "loss": 2.397185134887695, "step": 98660 }, { "epoch": 0.7964773213435258, "grad_norm": 1.4744608402252197, "learning_rate": 4.073245397930584e-06, "loss": 2.7163921356201173, "step": 98670 }, { "epoch": 0.7965580426692928, "grad_norm": 1.341937780380249, "learning_rate": 4.071629928192371e-06, "loss": 2.8931779861450195, "step": 98680 }, { "epoch": 0.7966387639950598, "grad_norm": 0.7573972344398499, "learning_rate": 4.070014458454158e-06, "loss": 2.2247594833374023, "step": 98690 }, { "epoch": 0.7967194853208269, "grad_norm": 1.7214577198028564, "learning_rate": 4.0683989887159445e-06, "loss": 2.232120704650879, "step": 98700 }, { "epoch": 0.796800206646594, "grad_norm": 1.1930736303329468, "learning_rate": 4.066783518977731e-06, "loss": 2.7375585556030275, "step": 98710 }, { "epoch": 0.796880927972361, "grad_norm": 1.81166672706604, "learning_rate": 4.065168049239518e-06, "loss": 3.0119361877441406, "step": 98720 }, { "epoch": 0.7969616492981281, "grad_norm": 0.8572211265563965, "learning_rate": 4.063552579501305e-06, "loss": 2.4031152725219727, "step": 98730 }, { "epoch": 0.7970423706238952, "grad_norm": 1.3036513328552246, "learning_rate": 4.061937109763092e-06, "loss": 2.6033605575561523, "step": 98740 }, { "epoch": 0.7971230919496621, "grad_norm": 0.7434897422790527, "learning_rate": 4.060321640024879e-06, "loss": 2.6512765884399414, "step": 98750 }, { "epoch": 0.7972038132754292, "grad_norm": 0.8913785815238953, "learning_rate": 4.0587061702866655e-06, "loss": 2.5399663925170897, "step": 98760 }, { "epoch": 0.7972845346011963, "grad_norm": 0.9241696000099182, "learning_rate": 4.057090700548452e-06, "loss": 2.253936767578125, "step": 98770 }, { "epoch": 0.7973652559269634, "grad_norm": 1.2546792030334473, "learning_rate": 4.055475230810239e-06, "loss": 2.14052734375, "step": 98780 }, { "epoch": 0.7974459772527304, "grad_norm": 0.8464639782905579, "learning_rate": 4.053859761072026e-06, "loss": 2.296213912963867, "step": 98790 }, { "epoch": 0.7975266985784975, "grad_norm": 1.4692710638046265, "learning_rate": 4.052244291333813e-06, "loss": 2.058506965637207, "step": 98800 }, { "epoch": 0.7976074199042645, "grad_norm": 1.3006103038787842, "learning_rate": 4.0506288215956e-06, "loss": 2.6454793930053713, "step": 98810 }, { "epoch": 0.7976881412300315, "grad_norm": 0.7693275809288025, "learning_rate": 4.049013351857387e-06, "loss": 2.6081777572631837, "step": 98820 }, { "epoch": 0.7977688625557986, "grad_norm": 0.7346494197845459, "learning_rate": 4.0473978821191734e-06, "loss": 2.4191558837890623, "step": 98830 }, { "epoch": 0.7978495838815657, "grad_norm": 0.6445767283439636, "learning_rate": 4.04578241238096e-06, "loss": 2.092968559265137, "step": 98840 }, { "epoch": 0.7979303052073328, "grad_norm": 0.5604060888290405, "learning_rate": 4.044166942642747e-06, "loss": 2.2130149841308593, "step": 98850 }, { "epoch": 0.7980110265330997, "grad_norm": 0.8002669215202332, "learning_rate": 4.042551472904534e-06, "loss": 2.3135656356811523, "step": 98860 }, { "epoch": 0.7980917478588668, "grad_norm": 0.7468751072883606, "learning_rate": 4.040936003166321e-06, "loss": 2.9754892349243165, "step": 98870 }, { "epoch": 0.7981724691846339, "grad_norm": 1.0023736953735352, "learning_rate": 4.039320533428108e-06, "loss": 2.5235843658447266, "step": 98880 }, { "epoch": 0.7982531905104009, "grad_norm": 0.6005073189735413, "learning_rate": 4.0377050636898945e-06, "loss": 2.346514129638672, "step": 98890 }, { "epoch": 0.798333911836168, "grad_norm": 1.1824722290039062, "learning_rate": 4.036089593951681e-06, "loss": 2.4108692169189454, "step": 98900 }, { "epoch": 0.7984146331619351, "grad_norm": 0.6975908279418945, "learning_rate": 4.034474124213468e-06, "loss": 2.490451431274414, "step": 98910 }, { "epoch": 0.7984953544877021, "grad_norm": 0.7038857936859131, "learning_rate": 4.032858654475255e-06, "loss": 2.726335144042969, "step": 98920 }, { "epoch": 0.7985760758134691, "grad_norm": 1.2106622457504272, "learning_rate": 4.031243184737042e-06, "loss": 2.3456974029541016, "step": 98930 }, { "epoch": 0.7986567971392362, "grad_norm": 1.421882152557373, "learning_rate": 4.029627714998829e-06, "loss": 3.035320281982422, "step": 98940 }, { "epoch": 0.7987375184650033, "grad_norm": 0.9901770949363708, "learning_rate": 4.028012245260616e-06, "loss": 2.5031410217285157, "step": 98950 }, { "epoch": 0.7988182397907704, "grad_norm": 0.5632336139678955, "learning_rate": 4.0263967755224024e-06, "loss": 2.297394371032715, "step": 98960 }, { "epoch": 0.7988989611165374, "grad_norm": 1.1106940507888794, "learning_rate": 4.024781305784189e-06, "loss": 2.6092559814453127, "step": 98970 }, { "epoch": 0.7989796824423044, "grad_norm": 0.962834358215332, "learning_rate": 4.023165836045976e-06, "loss": 2.4452693939208983, "step": 98980 }, { "epoch": 0.7990604037680715, "grad_norm": 0.5270605683326721, "learning_rate": 4.021550366307764e-06, "loss": 2.478558158874512, "step": 98990 }, { "epoch": 0.7991411250938385, "grad_norm": 0.892012357711792, "learning_rate": 4.019934896569551e-06, "loss": 2.597185516357422, "step": 99000 }, { "epoch": 0.7992218464196056, "grad_norm": 0.7855002284049988, "learning_rate": 4.0183194268313375e-06, "loss": 2.5083938598632813, "step": 99010 }, { "epoch": 0.7993025677453727, "grad_norm": 0.9427784085273743, "learning_rate": 4.016703957093124e-06, "loss": 2.3915420532226563, "step": 99020 }, { "epoch": 0.7993832890711398, "grad_norm": 0.8889765739440918, "learning_rate": 4.015088487354911e-06, "loss": 2.290950965881348, "step": 99030 }, { "epoch": 0.7994640103969067, "grad_norm": 1.2066378593444824, "learning_rate": 4.013473017616698e-06, "loss": 2.8040613174438476, "step": 99040 }, { "epoch": 0.7995447317226738, "grad_norm": 0.7843325138092041, "learning_rate": 4.011857547878485e-06, "loss": 2.515118408203125, "step": 99050 }, { "epoch": 0.7996254530484409, "grad_norm": 0.8798744082450867, "learning_rate": 4.010242078140272e-06, "loss": 2.1421173095703123, "step": 99060 }, { "epoch": 0.7997061743742079, "grad_norm": 0.7147968411445618, "learning_rate": 4.008626608402059e-06, "loss": 2.252010536193848, "step": 99070 }, { "epoch": 0.799786895699975, "grad_norm": 1.1351908445358276, "learning_rate": 4.0070111386638454e-06, "loss": 2.3387483596801757, "step": 99080 }, { "epoch": 0.799867617025742, "grad_norm": 0.8691686987876892, "learning_rate": 4.005395668925632e-06, "loss": 2.8239566802978517, "step": 99090 }, { "epoch": 0.7999483383515091, "grad_norm": 0.9442114233970642, "learning_rate": 4.003780199187419e-06, "loss": 3.1499820709228517, "step": 99100 }, { "epoch": 0.8000290596772761, "grad_norm": 1.0351589918136597, "learning_rate": 4.002164729449206e-06, "loss": 2.2286403656005858, "step": 99110 }, { "epoch": 0.8001097810030432, "grad_norm": 0.5999833941459656, "learning_rate": 4.000549259710993e-06, "loss": 2.4856779098510744, "step": 99120 }, { "epoch": 0.8001905023288103, "grad_norm": 1.2988579273223877, "learning_rate": 3.99893378997278e-06, "loss": 2.159880447387695, "step": 99130 }, { "epoch": 0.8002712236545773, "grad_norm": 1.0084953308105469, "learning_rate": 3.9973183202345665e-06, "loss": 2.4950271606445313, "step": 99140 }, { "epoch": 0.8003519449803443, "grad_norm": 0.7890580892562866, "learning_rate": 3.995702850496353e-06, "loss": 2.475062370300293, "step": 99150 }, { "epoch": 0.8004326663061114, "grad_norm": 0.9317099452018738, "learning_rate": 3.99408738075814e-06, "loss": 2.7035495758056642, "step": 99160 }, { "epoch": 0.8005133876318785, "grad_norm": 0.8703113198280334, "learning_rate": 3.992471911019927e-06, "loss": 2.235370635986328, "step": 99170 }, { "epoch": 0.8005941089576455, "grad_norm": 1.1247284412384033, "learning_rate": 3.990856441281714e-06, "loss": 2.249308967590332, "step": 99180 }, { "epoch": 0.8006748302834126, "grad_norm": 0.9515368938446045, "learning_rate": 3.989240971543501e-06, "loss": 2.735738182067871, "step": 99190 }, { "epoch": 0.8007555516091797, "grad_norm": 1.4434176683425903, "learning_rate": 3.987625501805288e-06, "loss": 2.485925483703613, "step": 99200 }, { "epoch": 0.8008362729349466, "grad_norm": 1.2068159580230713, "learning_rate": 3.9860100320670744e-06, "loss": 2.3539012908935546, "step": 99210 }, { "epoch": 0.8009169942607137, "grad_norm": 1.6557337045669556, "learning_rate": 3.984394562328861e-06, "loss": 3.1113412857055662, "step": 99220 }, { "epoch": 0.8009977155864808, "grad_norm": 0.993149995803833, "learning_rate": 3.982779092590648e-06, "loss": 2.486080551147461, "step": 99230 }, { "epoch": 0.8010784369122479, "grad_norm": 0.8954370617866516, "learning_rate": 3.981163622852435e-06, "loss": 2.3508321762084963, "step": 99240 }, { "epoch": 0.8011591582380149, "grad_norm": 0.8662140965461731, "learning_rate": 3.979548153114222e-06, "loss": 2.6721786499023437, "step": 99250 }, { "epoch": 0.801239879563782, "grad_norm": 1.1443747282028198, "learning_rate": 3.977932683376009e-06, "loss": 2.9797380447387694, "step": 99260 }, { "epoch": 0.801320600889549, "grad_norm": 0.8544486165046692, "learning_rate": 3.9763172136377955e-06, "loss": 2.514688491821289, "step": 99270 }, { "epoch": 0.801401322215316, "grad_norm": 0.8113943338394165, "learning_rate": 3.974701743899582e-06, "loss": 2.7515727996826174, "step": 99280 }, { "epoch": 0.8014820435410831, "grad_norm": 0.659696638584137, "learning_rate": 3.973086274161369e-06, "loss": 2.013201904296875, "step": 99290 }, { "epoch": 0.8015627648668502, "grad_norm": 1.1336146593093872, "learning_rate": 3.971470804423157e-06, "loss": 2.228530502319336, "step": 99300 }, { "epoch": 0.8016434861926173, "grad_norm": 1.3495017290115356, "learning_rate": 3.969855334684944e-06, "loss": 2.558847427368164, "step": 99310 }, { "epoch": 0.8017242075183842, "grad_norm": 0.819765031337738, "learning_rate": 3.968239864946731e-06, "loss": 2.295482635498047, "step": 99320 }, { "epoch": 0.8018049288441513, "grad_norm": 1.433532953262329, "learning_rate": 3.966624395208517e-06, "loss": 2.8885976791381838, "step": 99330 }, { "epoch": 0.8018856501699184, "grad_norm": 0.966533362865448, "learning_rate": 3.965008925470304e-06, "loss": 2.3963647842407227, "step": 99340 }, { "epoch": 0.8019663714956854, "grad_norm": 0.5918118357658386, "learning_rate": 3.963393455732091e-06, "loss": 2.5073268890380858, "step": 99350 }, { "epoch": 0.8020470928214525, "grad_norm": 0.7769534587860107, "learning_rate": 3.961777985993878e-06, "loss": 2.563587760925293, "step": 99360 }, { "epoch": 0.8021278141472196, "grad_norm": 0.8664913177490234, "learning_rate": 3.960162516255665e-06, "loss": 2.4643545150756836, "step": 99370 }, { "epoch": 0.8022085354729867, "grad_norm": 2.275522470474243, "learning_rate": 3.958547046517452e-06, "loss": 2.3850326538085938, "step": 99380 }, { "epoch": 0.8022892567987536, "grad_norm": 0.7848871350288391, "learning_rate": 3.9569315767792385e-06, "loss": 2.0911796569824217, "step": 99390 }, { "epoch": 0.8023699781245207, "grad_norm": 2.4568369388580322, "learning_rate": 3.955316107041025e-06, "loss": 2.4010700225830077, "step": 99400 }, { "epoch": 0.8024506994502878, "grad_norm": 0.9277734756469727, "learning_rate": 3.953700637302812e-06, "loss": 2.3860572814941405, "step": 99410 }, { "epoch": 0.8025314207760548, "grad_norm": 0.9601544737815857, "learning_rate": 3.952085167564599e-06, "loss": 3.201374816894531, "step": 99420 }, { "epoch": 0.8026121421018219, "grad_norm": 0.9295246601104736, "learning_rate": 3.950469697826386e-06, "loss": 2.494176483154297, "step": 99430 }, { "epoch": 0.8026928634275889, "grad_norm": 0.7258851528167725, "learning_rate": 3.948854228088173e-06, "loss": 2.498567008972168, "step": 99440 }, { "epoch": 0.802773584753356, "grad_norm": 0.7288070917129517, "learning_rate": 3.9472387583499596e-06, "loss": 2.510834884643555, "step": 99450 }, { "epoch": 0.802854306079123, "grad_norm": 2.187777042388916, "learning_rate": 3.945623288611746e-06, "loss": 2.4113161087036135, "step": 99460 }, { "epoch": 0.8029350274048901, "grad_norm": 1.0757205486297607, "learning_rate": 3.944007818873533e-06, "loss": 2.4674121856689455, "step": 99470 }, { "epoch": 0.8030157487306572, "grad_norm": 0.6025052666664124, "learning_rate": 3.94239234913532e-06, "loss": 2.2825775146484375, "step": 99480 }, { "epoch": 0.8030964700564243, "grad_norm": 1.164801836013794, "learning_rate": 3.940776879397107e-06, "loss": 2.0378646850585938, "step": 99490 }, { "epoch": 0.8031771913821912, "grad_norm": 0.8997806906700134, "learning_rate": 3.939161409658894e-06, "loss": 2.459293556213379, "step": 99500 }, { "epoch": 0.8032579127079583, "grad_norm": 0.7996758222579956, "learning_rate": 3.937545939920681e-06, "loss": 1.9181482315063476, "step": 99510 }, { "epoch": 0.8033386340337254, "grad_norm": 1.051781415939331, "learning_rate": 3.9359304701824675e-06, "loss": 2.530696487426758, "step": 99520 }, { "epoch": 0.8034193553594924, "grad_norm": 0.6507453918457031, "learning_rate": 3.934315000444254e-06, "loss": 2.4755437850952147, "step": 99530 }, { "epoch": 0.8035000766852595, "grad_norm": 0.9553725123405457, "learning_rate": 3.932699530706041e-06, "loss": 2.343908500671387, "step": 99540 }, { "epoch": 0.8035807980110266, "grad_norm": 1.0834935903549194, "learning_rate": 3.931084060967828e-06, "loss": 2.3268003463745117, "step": 99550 }, { "epoch": 0.8036615193367936, "grad_norm": 0.6740466952323914, "learning_rate": 3.929468591229615e-06, "loss": 1.989282989501953, "step": 99560 }, { "epoch": 0.8037422406625606, "grad_norm": 0.821422278881073, "learning_rate": 3.927853121491402e-06, "loss": 2.354655075073242, "step": 99570 }, { "epoch": 0.8038229619883277, "grad_norm": 1.1248369216918945, "learning_rate": 3.9262376517531886e-06, "loss": 2.183466148376465, "step": 99580 }, { "epoch": 0.8039036833140948, "grad_norm": 0.6367834210395813, "learning_rate": 3.924622182014975e-06, "loss": 2.4110740661621093, "step": 99590 }, { "epoch": 0.8039844046398618, "grad_norm": 1.0604819059371948, "learning_rate": 3.923006712276762e-06, "loss": 1.9817512512207032, "step": 99600 }, { "epoch": 0.8040651259656288, "grad_norm": 1.4369182586669922, "learning_rate": 3.921391242538549e-06, "loss": 2.930604362487793, "step": 99610 }, { "epoch": 0.8041458472913959, "grad_norm": 0.7399820685386658, "learning_rate": 3.919775772800337e-06, "loss": 2.4067705154418944, "step": 99620 }, { "epoch": 0.804226568617163, "grad_norm": 0.7986034154891968, "learning_rate": 3.918160303062124e-06, "loss": 2.2713033676147463, "step": 99630 }, { "epoch": 0.80430728994293, "grad_norm": 0.8779634237289429, "learning_rate": 3.9165448333239105e-06, "loss": 2.400582504272461, "step": 99640 }, { "epoch": 0.8043880112686971, "grad_norm": 0.7613940834999084, "learning_rate": 3.914929363585697e-06, "loss": 2.3303401947021483, "step": 99650 }, { "epoch": 0.8044687325944642, "grad_norm": 0.6333419680595398, "learning_rate": 3.913313893847484e-06, "loss": 1.9646009445190429, "step": 99660 }, { "epoch": 0.8045494539202311, "grad_norm": 0.6129809617996216, "learning_rate": 3.911698424109271e-06, "loss": 2.182118034362793, "step": 99670 }, { "epoch": 0.8046301752459982, "grad_norm": 0.7639891505241394, "learning_rate": 3.910082954371058e-06, "loss": 2.5352020263671875, "step": 99680 }, { "epoch": 0.8047108965717653, "grad_norm": 1.6559388637542725, "learning_rate": 3.908467484632845e-06, "loss": 2.3771800994873047, "step": 99690 }, { "epoch": 0.8047916178975324, "grad_norm": 1.8433024883270264, "learning_rate": 3.9068520148946316e-06, "loss": 2.4573646545410157, "step": 99700 }, { "epoch": 0.8048723392232994, "grad_norm": 0.7503751516342163, "learning_rate": 3.905236545156418e-06, "loss": 1.993792724609375, "step": 99710 }, { "epoch": 0.8049530605490665, "grad_norm": 0.9495934247970581, "learning_rate": 3.903621075418205e-06, "loss": 2.4123945236206055, "step": 99720 }, { "epoch": 0.8050337818748335, "grad_norm": 0.9564493894577026, "learning_rate": 3.902005605679992e-06, "loss": 2.4854305267333983, "step": 99730 }, { "epoch": 0.8051145032006005, "grad_norm": 1.24598228931427, "learning_rate": 3.900390135941779e-06, "loss": 2.279680633544922, "step": 99740 }, { "epoch": 0.8051952245263676, "grad_norm": 0.7273526191711426, "learning_rate": 3.898774666203566e-06, "loss": 2.1156286239624023, "step": 99750 }, { "epoch": 0.8052759458521347, "grad_norm": 0.830149233341217, "learning_rate": 3.897159196465353e-06, "loss": 2.304363250732422, "step": 99760 }, { "epoch": 0.8053566671779018, "grad_norm": 1.2568073272705078, "learning_rate": 3.8955437267271395e-06, "loss": 2.5022855758666993, "step": 99770 }, { "epoch": 0.8054373885036687, "grad_norm": 0.6845056414604187, "learning_rate": 3.893928256988926e-06, "loss": 1.924942398071289, "step": 99780 }, { "epoch": 0.8055181098294358, "grad_norm": 1.0622795820236206, "learning_rate": 3.892312787250713e-06, "loss": 2.3610206604003907, "step": 99790 }, { "epoch": 0.8055988311552029, "grad_norm": 1.5329278707504272, "learning_rate": 3.8906973175125e-06, "loss": 2.4407630920410157, "step": 99800 }, { "epoch": 0.8056795524809699, "grad_norm": 0.4854355454444885, "learning_rate": 3.889081847774287e-06, "loss": 2.5401477813720703, "step": 99810 }, { "epoch": 0.805760273806737, "grad_norm": 0.8639071583747864, "learning_rate": 3.887466378036074e-06, "loss": 2.45440673828125, "step": 99820 }, { "epoch": 0.8058409951325041, "grad_norm": 0.5460861325263977, "learning_rate": 3.8858509082978606e-06, "loss": 2.531194877624512, "step": 99830 }, { "epoch": 0.8059217164582712, "grad_norm": 0.6964040994644165, "learning_rate": 3.884235438559647e-06, "loss": 2.842868423461914, "step": 99840 }, { "epoch": 0.8060024377840381, "grad_norm": 1.059173822402954, "learning_rate": 3.882619968821434e-06, "loss": 2.4514772415161135, "step": 99850 }, { "epoch": 0.8060831591098052, "grad_norm": 0.5403340458869934, "learning_rate": 3.881004499083221e-06, "loss": 2.4651208877563477, "step": 99860 }, { "epoch": 0.8061638804355723, "grad_norm": 0.9925677180290222, "learning_rate": 3.879389029345008e-06, "loss": 2.838206100463867, "step": 99870 }, { "epoch": 0.8062446017613393, "grad_norm": 1.5354666709899902, "learning_rate": 3.877773559606795e-06, "loss": 2.3286159515380858, "step": 99880 }, { "epoch": 0.8063253230871064, "grad_norm": 0.8598012328147888, "learning_rate": 3.876158089868582e-06, "loss": 2.604977607727051, "step": 99890 }, { "epoch": 0.8064060444128734, "grad_norm": 0.697341799736023, "learning_rate": 3.8745426201303685e-06, "loss": 2.8165353775024413, "step": 99900 }, { "epoch": 0.8064867657386405, "grad_norm": 0.9460427761077881, "learning_rate": 3.872927150392155e-06, "loss": 2.5445571899414063, "step": 99910 }, { "epoch": 0.8065674870644075, "grad_norm": 1.1053245067596436, "learning_rate": 3.871311680653942e-06, "loss": 2.259646415710449, "step": 99920 }, { "epoch": 0.8066482083901746, "grad_norm": 1.2656443119049072, "learning_rate": 3.869696210915729e-06, "loss": 2.6137670516967773, "step": 99930 }, { "epoch": 0.8067289297159417, "grad_norm": 0.8542507290840149, "learning_rate": 3.868080741177516e-06, "loss": 2.7848419189453124, "step": 99940 }, { "epoch": 0.8068096510417088, "grad_norm": 0.7835139036178589, "learning_rate": 3.866465271439303e-06, "loss": 2.321806526184082, "step": 99950 }, { "epoch": 0.8068903723674757, "grad_norm": 1.1516780853271484, "learning_rate": 3.8648498017010895e-06, "loss": 2.8077810287475584, "step": 99960 }, { "epoch": 0.8069710936932428, "grad_norm": 1.2160855531692505, "learning_rate": 3.863234331962876e-06, "loss": 2.5185483932495116, "step": 99970 }, { "epoch": 0.8070518150190099, "grad_norm": 0.8812885284423828, "learning_rate": 3.861618862224663e-06, "loss": 2.5407663345336915, "step": 99980 }, { "epoch": 0.8071325363447769, "grad_norm": 1.088274598121643, "learning_rate": 3.86000339248645e-06, "loss": 3.090616798400879, "step": 99990 }, { "epoch": 0.807213257670544, "grad_norm": 1.3522486686706543, "learning_rate": 3.858387922748238e-06, "loss": 2.047108268737793, "step": 100000 }, { "epoch": 0.8072939789963111, "grad_norm": 1.3619083166122437, "learning_rate": 3.856772453010025e-06, "loss": 2.3593854904174805, "step": 100010 }, { "epoch": 0.8073747003220781, "grad_norm": 1.5140352249145508, "learning_rate": 3.8551569832718115e-06, "loss": 2.3730648040771483, "step": 100020 }, { "epoch": 0.8074554216478451, "grad_norm": 0.9310379028320312, "learning_rate": 3.853541513533598e-06, "loss": 2.4593849182128906, "step": 100030 }, { "epoch": 0.8075361429736122, "grad_norm": 0.9677391052246094, "learning_rate": 3.851926043795385e-06, "loss": 2.6406534194946287, "step": 100040 }, { "epoch": 0.8076168642993793, "grad_norm": 0.7409820556640625, "learning_rate": 3.850310574057172e-06, "loss": 2.4460308074951174, "step": 100050 }, { "epoch": 0.8076975856251463, "grad_norm": 0.682105541229248, "learning_rate": 3.848695104318959e-06, "loss": 2.451498031616211, "step": 100060 }, { "epoch": 0.8077783069509133, "grad_norm": 1.2746646404266357, "learning_rate": 3.847079634580746e-06, "loss": 2.477401542663574, "step": 100070 }, { "epoch": 0.8078590282766804, "grad_norm": 0.8760353922843933, "learning_rate": 3.8454641648425325e-06, "loss": 2.017189598083496, "step": 100080 }, { "epoch": 0.8079397496024475, "grad_norm": 1.4307183027267456, "learning_rate": 3.843848695104319e-06, "loss": 2.8041349411010743, "step": 100090 }, { "epoch": 0.8080204709282145, "grad_norm": 0.6359219551086426, "learning_rate": 3.842233225366106e-06, "loss": 2.1230371475219725, "step": 100100 }, { "epoch": 0.8081011922539816, "grad_norm": 0.6181296110153198, "learning_rate": 3.840617755627893e-06, "loss": 2.529888725280762, "step": 100110 }, { "epoch": 0.8081819135797487, "grad_norm": 0.7991848587989807, "learning_rate": 3.83900228588968e-06, "loss": 2.772335433959961, "step": 100120 }, { "epoch": 0.8082626349055156, "grad_norm": 1.3912948369979858, "learning_rate": 3.837386816151467e-06, "loss": 2.414133071899414, "step": 100130 }, { "epoch": 0.8083433562312827, "grad_norm": 1.3746670484542847, "learning_rate": 3.835771346413254e-06, "loss": 2.054134750366211, "step": 100140 }, { "epoch": 0.8084240775570498, "grad_norm": 1.064869999885559, "learning_rate": 3.8341558766750405e-06, "loss": 2.444982719421387, "step": 100150 }, { "epoch": 0.8085047988828169, "grad_norm": 0.6405596137046814, "learning_rate": 3.832540406936827e-06, "loss": 2.357575798034668, "step": 100160 }, { "epoch": 0.8085855202085839, "grad_norm": 1.3581368923187256, "learning_rate": 3.830924937198614e-06, "loss": 2.4479928970336915, "step": 100170 }, { "epoch": 0.808666241534351, "grad_norm": 2.0927021503448486, "learning_rate": 3.829309467460401e-06, "loss": 2.61566219329834, "step": 100180 }, { "epoch": 0.808746962860118, "grad_norm": 1.5612667798995972, "learning_rate": 3.827693997722188e-06, "loss": 2.5110374450683595, "step": 100190 }, { "epoch": 0.808827684185885, "grad_norm": 0.7488995790481567, "learning_rate": 3.826078527983975e-06, "loss": 2.8471353530883787, "step": 100200 }, { "epoch": 0.8089084055116521, "grad_norm": 0.8690032958984375, "learning_rate": 3.8244630582457615e-06, "loss": 2.5407878875732424, "step": 100210 }, { "epoch": 0.8089891268374192, "grad_norm": 0.9498918652534485, "learning_rate": 3.822847588507548e-06, "loss": 2.3243740081787108, "step": 100220 }, { "epoch": 0.8090698481631863, "grad_norm": 0.7940405607223511, "learning_rate": 3.821232118769335e-06, "loss": 2.344155120849609, "step": 100230 }, { "epoch": 0.8091505694889533, "grad_norm": 0.8345101475715637, "learning_rate": 3.819616649031123e-06, "loss": 2.9587024688720702, "step": 100240 }, { "epoch": 0.8092312908147203, "grad_norm": 0.6044783592224121, "learning_rate": 3.81800117929291e-06, "loss": 2.549947738647461, "step": 100250 }, { "epoch": 0.8093120121404874, "grad_norm": 1.7681750059127808, "learning_rate": 3.816385709554697e-06, "loss": 2.7186986923217775, "step": 100260 }, { "epoch": 0.8093927334662544, "grad_norm": 0.5100528597831726, "learning_rate": 3.8147702398164835e-06, "loss": 1.9792531967163085, "step": 100270 }, { "epoch": 0.8094734547920215, "grad_norm": 1.1791340112686157, "learning_rate": 3.81315477007827e-06, "loss": 2.443340301513672, "step": 100280 }, { "epoch": 0.8095541761177886, "grad_norm": 0.8635392785072327, "learning_rate": 3.8115393003400567e-06, "loss": 2.542934036254883, "step": 100290 }, { "epoch": 0.8096348974435557, "grad_norm": 0.6600329279899597, "learning_rate": 3.809923830601844e-06, "loss": 2.1368980407714844, "step": 100300 }, { "epoch": 0.8097156187693226, "grad_norm": 1.0581196546554565, "learning_rate": 3.808308360863631e-06, "loss": 2.579632377624512, "step": 100310 }, { "epoch": 0.8097963400950897, "grad_norm": 1.1476123332977295, "learning_rate": 3.8066928911254177e-06, "loss": 2.6446722030639647, "step": 100320 }, { "epoch": 0.8098770614208568, "grad_norm": 0.7727769017219543, "learning_rate": 3.8050774213872045e-06, "loss": 2.092713165283203, "step": 100330 }, { "epoch": 0.8099577827466238, "grad_norm": 1.3601003885269165, "learning_rate": 3.8034619516489914e-06, "loss": 2.401910400390625, "step": 100340 }, { "epoch": 0.8100385040723909, "grad_norm": 0.9635487198829651, "learning_rate": 3.8018464819107782e-06, "loss": 2.4850831985473634, "step": 100350 }, { "epoch": 0.810119225398158, "grad_norm": 0.6072301864624023, "learning_rate": 3.800231012172565e-06, "loss": 3.160215950012207, "step": 100360 }, { "epoch": 0.810199946723925, "grad_norm": 1.0080260038375854, "learning_rate": 3.798615542434352e-06, "loss": 2.09561824798584, "step": 100370 }, { "epoch": 0.810280668049692, "grad_norm": 1.6262425184249878, "learning_rate": 3.7970000726961388e-06, "loss": 2.331893730163574, "step": 100380 }, { "epoch": 0.8103613893754591, "grad_norm": 1.0381296873092651, "learning_rate": 3.7953846029579256e-06, "loss": 2.65771541595459, "step": 100390 }, { "epoch": 0.8104421107012262, "grad_norm": 1.690034031867981, "learning_rate": 3.7937691332197125e-06, "loss": 2.613581657409668, "step": 100400 }, { "epoch": 0.8105228320269932, "grad_norm": 0.6048244833946228, "learning_rate": 3.7921536634814993e-06, "loss": 1.9195425033569335, "step": 100410 }, { "epoch": 0.8106035533527602, "grad_norm": 0.7549740076065063, "learning_rate": 3.790538193743286e-06, "loss": 2.635000801086426, "step": 100420 }, { "epoch": 0.8106842746785273, "grad_norm": 0.9945607781410217, "learning_rate": 3.788922724005073e-06, "loss": 2.6288970947265624, "step": 100430 }, { "epoch": 0.8107649960042944, "grad_norm": 0.664218544960022, "learning_rate": 3.78730725426686e-06, "loss": 2.173229789733887, "step": 100440 }, { "epoch": 0.8108457173300614, "grad_norm": 0.8597804307937622, "learning_rate": 3.7856917845286467e-06, "loss": 2.4998092651367188, "step": 100450 }, { "epoch": 0.8109264386558285, "grad_norm": 1.2827205657958984, "learning_rate": 3.7840763147904335e-06, "loss": 2.7007257461547853, "step": 100460 }, { "epoch": 0.8110071599815956, "grad_norm": 1.1859867572784424, "learning_rate": 3.7824608450522204e-06, "loss": 3.2726558685302733, "step": 100470 }, { "epoch": 0.8110878813073626, "grad_norm": 0.8064234256744385, "learning_rate": 3.7808453753140072e-06, "loss": 2.7752079010009765, "step": 100480 }, { "epoch": 0.8111686026331296, "grad_norm": 0.9000873565673828, "learning_rate": 3.779229905575794e-06, "loss": 2.2561649322509765, "step": 100490 }, { "epoch": 0.8112493239588967, "grad_norm": 0.7128477096557617, "learning_rate": 3.777614435837581e-06, "loss": 2.260914611816406, "step": 100500 }, { "epoch": 0.8113300452846638, "grad_norm": 0.9400056004524231, "learning_rate": 3.7759989660993678e-06, "loss": 2.4450708389282227, "step": 100510 }, { "epoch": 0.8114107666104308, "grad_norm": 1.0537761449813843, "learning_rate": 3.7743834963611546e-06, "loss": 2.2285919189453125, "step": 100520 }, { "epoch": 0.8114914879361979, "grad_norm": 0.9221999049186707, "learning_rate": 3.7727680266229414e-06, "loss": 2.798000526428223, "step": 100530 }, { "epoch": 0.8115722092619649, "grad_norm": 1.058484673500061, "learning_rate": 3.7711525568847283e-06, "loss": 2.386382293701172, "step": 100540 }, { "epoch": 0.811652930587732, "grad_norm": 0.9512701630592346, "learning_rate": 3.769537087146515e-06, "loss": 2.2739381790161133, "step": 100550 }, { "epoch": 0.811733651913499, "grad_norm": 1.0298120975494385, "learning_rate": 3.7679216174083024e-06, "loss": 2.3381723403930663, "step": 100560 }, { "epoch": 0.8118143732392661, "grad_norm": 0.9654929637908936, "learning_rate": 3.7663061476700893e-06, "loss": 2.169791030883789, "step": 100570 }, { "epoch": 0.8118950945650332, "grad_norm": 0.943855345249176, "learning_rate": 3.764690677931876e-06, "loss": 2.1073657989501955, "step": 100580 }, { "epoch": 0.8119758158908001, "grad_norm": 0.6945945024490356, "learning_rate": 3.763075208193663e-06, "loss": 2.247145080566406, "step": 100590 }, { "epoch": 0.8120565372165672, "grad_norm": 0.8757111430168152, "learning_rate": 3.76145973845545e-06, "loss": 2.436937713623047, "step": 100600 }, { "epoch": 0.8121372585423343, "grad_norm": 0.8423963785171509, "learning_rate": 3.7598442687172366e-06, "loss": 2.53930606842041, "step": 100610 }, { "epoch": 0.8122179798681014, "grad_norm": 1.0443363189697266, "learning_rate": 3.7582287989790235e-06, "loss": 2.3088932037353516, "step": 100620 }, { "epoch": 0.8122987011938684, "grad_norm": 1.4641779661178589, "learning_rate": 3.7566133292408103e-06, "loss": 2.1261880874633787, "step": 100630 }, { "epoch": 0.8123794225196355, "grad_norm": 1.0320755243301392, "learning_rate": 3.754997859502597e-06, "loss": 2.442340850830078, "step": 100640 }, { "epoch": 0.8124601438454025, "grad_norm": 0.7883286476135254, "learning_rate": 3.753382389764384e-06, "loss": 2.7694393157958985, "step": 100650 }, { "epoch": 0.8125408651711695, "grad_norm": 0.5282810926437378, "learning_rate": 3.751766920026171e-06, "loss": 2.2256113052368165, "step": 100660 }, { "epoch": 0.8126215864969366, "grad_norm": 1.1791144609451294, "learning_rate": 3.7501514502879577e-06, "loss": 2.2823495864868164, "step": 100670 }, { "epoch": 0.8127023078227037, "grad_norm": 0.8076584935188293, "learning_rate": 3.7485359805497446e-06, "loss": 2.4481273651123048, "step": 100680 }, { "epoch": 0.8127830291484708, "grad_norm": 0.7602617740631104, "learning_rate": 3.7469205108115314e-06, "loss": 2.5092796325683593, "step": 100690 }, { "epoch": 0.8128637504742378, "grad_norm": 0.6224845051765442, "learning_rate": 3.7453050410733182e-06, "loss": 2.4586441040039064, "step": 100700 }, { "epoch": 0.8129444718000048, "grad_norm": 1.2321196794509888, "learning_rate": 3.743689571335105e-06, "loss": 2.917795753479004, "step": 100710 }, { "epoch": 0.8130251931257719, "grad_norm": 2.031310796737671, "learning_rate": 3.742074101596892e-06, "loss": 2.915929412841797, "step": 100720 }, { "epoch": 0.8131059144515389, "grad_norm": 1.5553462505340576, "learning_rate": 3.7404586318586788e-06, "loss": 2.957657814025879, "step": 100730 }, { "epoch": 0.813186635777306, "grad_norm": 0.5986039042472839, "learning_rate": 3.7388431621204656e-06, "loss": 2.322647285461426, "step": 100740 }, { "epoch": 0.8132673571030731, "grad_norm": 0.6360018253326416, "learning_rate": 3.7372276923822525e-06, "loss": 2.6770524978637695, "step": 100750 }, { "epoch": 0.8133480784288402, "grad_norm": 1.6188523769378662, "learning_rate": 3.7356122226440393e-06, "loss": 2.2638271331787108, "step": 100760 }, { "epoch": 0.8134287997546071, "grad_norm": 1.4188083410263062, "learning_rate": 3.733996752905826e-06, "loss": 2.317504119873047, "step": 100770 }, { "epoch": 0.8135095210803742, "grad_norm": 1.76022469997406, "learning_rate": 3.732381283167613e-06, "loss": 2.7501575469970705, "step": 100780 }, { "epoch": 0.8135902424061413, "grad_norm": 0.7009748816490173, "learning_rate": 3.7307658134294e-06, "loss": 2.3578657150268554, "step": 100790 }, { "epoch": 0.8136709637319083, "grad_norm": 1.0132232904434204, "learning_rate": 3.7291503436911867e-06, "loss": 2.3472360610961913, "step": 100800 }, { "epoch": 0.8137516850576754, "grad_norm": 1.2893500328063965, "learning_rate": 3.7275348739529736e-06, "loss": 2.155162811279297, "step": 100810 }, { "epoch": 0.8138324063834425, "grad_norm": 1.3152776956558228, "learning_rate": 3.725919404214761e-06, "loss": 2.8231796264648437, "step": 100820 }, { "epoch": 0.8139131277092095, "grad_norm": 1.2326405048370361, "learning_rate": 3.7243039344765477e-06, "loss": 2.2795526504516603, "step": 100830 }, { "epoch": 0.8139938490349765, "grad_norm": 0.9740943908691406, "learning_rate": 3.7226884647383345e-06, "loss": 2.4664066314697264, "step": 100840 }, { "epoch": 0.8140745703607436, "grad_norm": 1.0882854461669922, "learning_rate": 3.7210729950001214e-06, "loss": 2.355032730102539, "step": 100850 }, { "epoch": 0.8141552916865107, "grad_norm": 1.0973103046417236, "learning_rate": 3.719457525261908e-06, "loss": 2.240022659301758, "step": 100860 }, { "epoch": 0.8142360130122777, "grad_norm": 0.6536450386047363, "learning_rate": 3.7178420555236955e-06, "loss": 2.3433248519897463, "step": 100870 }, { "epoch": 0.8143167343380447, "grad_norm": 0.6695423126220703, "learning_rate": 3.7162265857854823e-06, "loss": 2.476786994934082, "step": 100880 }, { "epoch": 0.8143974556638118, "grad_norm": 1.4374481439590454, "learning_rate": 3.714611116047269e-06, "loss": 2.309944725036621, "step": 100890 }, { "epoch": 0.8144781769895789, "grad_norm": 1.049780249595642, "learning_rate": 3.712995646309056e-06, "loss": 2.7312290191650392, "step": 100900 }, { "epoch": 0.8145588983153459, "grad_norm": 1.4618111848831177, "learning_rate": 3.711380176570843e-06, "loss": 1.9609739303588867, "step": 100910 }, { "epoch": 0.814639619641113, "grad_norm": 0.8162816166877747, "learning_rate": 3.7097647068326297e-06, "loss": 2.198960304260254, "step": 100920 }, { "epoch": 0.8147203409668801, "grad_norm": 1.1761513948440552, "learning_rate": 3.7081492370944165e-06, "loss": 2.4488359451293946, "step": 100930 }, { "epoch": 0.8148010622926471, "grad_norm": 1.183932900428772, "learning_rate": 3.7065337673562034e-06, "loss": 2.9076908111572264, "step": 100940 }, { "epoch": 0.8148817836184141, "grad_norm": 0.5758711695671082, "learning_rate": 3.7049182976179902e-06, "loss": 2.603963851928711, "step": 100950 }, { "epoch": 0.8149625049441812, "grad_norm": 0.9458169937133789, "learning_rate": 3.703302827879777e-06, "loss": 2.6595380783081053, "step": 100960 }, { "epoch": 0.8150432262699483, "grad_norm": 2.092104196548462, "learning_rate": 3.701687358141564e-06, "loss": 2.1210784912109375, "step": 100970 }, { "epoch": 0.8151239475957153, "grad_norm": 1.108140230178833, "learning_rate": 3.7000718884033508e-06, "loss": 2.4084182739257813, "step": 100980 }, { "epoch": 0.8152046689214824, "grad_norm": 0.778377890586853, "learning_rate": 3.6984564186651376e-06, "loss": 2.3638071060180663, "step": 100990 }, { "epoch": 0.8152853902472494, "grad_norm": 1.2513737678527832, "learning_rate": 3.696840948926925e-06, "loss": 2.3948795318603517, "step": 101000 }, { "epoch": 0.8153661115730165, "grad_norm": 1.532837152481079, "learning_rate": 3.6952254791887117e-06, "loss": 2.2629384994506836, "step": 101010 }, { "epoch": 0.8154468328987835, "grad_norm": 0.913145124912262, "learning_rate": 3.6936100094504986e-06, "loss": 2.3756771087646484, "step": 101020 }, { "epoch": 0.8155275542245506, "grad_norm": 0.7358099222183228, "learning_rate": 3.6919945397122854e-06, "loss": 2.1850608825683593, "step": 101030 }, { "epoch": 0.8156082755503177, "grad_norm": 0.6651201248168945, "learning_rate": 3.6903790699740723e-06, "loss": 3.093212127685547, "step": 101040 }, { "epoch": 0.8156889968760846, "grad_norm": 0.956275999546051, "learning_rate": 3.688763600235859e-06, "loss": 2.275537300109863, "step": 101050 }, { "epoch": 0.8157697182018517, "grad_norm": 1.2521132230758667, "learning_rate": 3.687148130497646e-06, "loss": 2.30834846496582, "step": 101060 }, { "epoch": 0.8158504395276188, "grad_norm": 1.6253803968429565, "learning_rate": 3.685532660759433e-06, "loss": 3.412056732177734, "step": 101070 }, { "epoch": 0.8159311608533859, "grad_norm": 0.7306663393974304, "learning_rate": 3.6839171910212197e-06, "loss": 2.848357582092285, "step": 101080 }, { "epoch": 0.8160118821791529, "grad_norm": 0.8813583254814148, "learning_rate": 3.6823017212830065e-06, "loss": 2.148305320739746, "step": 101090 }, { "epoch": 0.81609260350492, "grad_norm": 1.155396580696106, "learning_rate": 3.6806862515447933e-06, "loss": 2.2276418685913084, "step": 101100 }, { "epoch": 0.816173324830687, "grad_norm": 1.0691121816635132, "learning_rate": 3.67907078180658e-06, "loss": 2.2528453826904298, "step": 101110 }, { "epoch": 0.816254046156454, "grad_norm": 1.3055006265640259, "learning_rate": 3.677455312068367e-06, "loss": 2.591379928588867, "step": 101120 }, { "epoch": 0.8163347674822211, "grad_norm": 1.189941644668579, "learning_rate": 3.675839842330154e-06, "loss": 2.555938148498535, "step": 101130 }, { "epoch": 0.8164154888079882, "grad_norm": 0.901233971118927, "learning_rate": 3.6742243725919407e-06, "loss": 2.9762994766235353, "step": 101140 }, { "epoch": 0.8164962101337553, "grad_norm": 1.1965309381484985, "learning_rate": 3.6726089028537276e-06, "loss": 2.624092864990234, "step": 101150 }, { "epoch": 0.8165769314595223, "grad_norm": 4.1100287437438965, "learning_rate": 3.6709934331155144e-06, "loss": 2.7530353546142576, "step": 101160 }, { "epoch": 0.8166576527852893, "grad_norm": 0.942739725112915, "learning_rate": 3.6693779633773013e-06, "loss": 2.706081771850586, "step": 101170 }, { "epoch": 0.8167383741110564, "grad_norm": 0.6787428855895996, "learning_rate": 3.667762493639088e-06, "loss": 2.6341373443603517, "step": 101180 }, { "epoch": 0.8168190954368234, "grad_norm": 0.8619131445884705, "learning_rate": 3.666147023900875e-06, "loss": 2.8157583236694337, "step": 101190 }, { "epoch": 0.8168998167625905, "grad_norm": 0.7854671478271484, "learning_rate": 3.664531554162662e-06, "loss": 2.152760314941406, "step": 101200 }, { "epoch": 0.8169805380883576, "grad_norm": 0.8579937815666199, "learning_rate": 3.6629160844244486e-06, "loss": 2.5343013763427735, "step": 101210 }, { "epoch": 0.8170612594141247, "grad_norm": 2.071411371231079, "learning_rate": 3.6613006146862355e-06, "loss": 2.860113525390625, "step": 101220 }, { "epoch": 0.8171419807398916, "grad_norm": 0.9650925993919373, "learning_rate": 3.6596851449480223e-06, "loss": 2.0225080490112304, "step": 101230 }, { "epoch": 0.8172227020656587, "grad_norm": 1.5225584506988525, "learning_rate": 3.658069675209809e-06, "loss": 2.225231742858887, "step": 101240 }, { "epoch": 0.8173034233914258, "grad_norm": 0.9024606943130493, "learning_rate": 3.656454205471596e-06, "loss": 2.4002933502197266, "step": 101250 }, { "epoch": 0.8173841447171928, "grad_norm": 1.3377574682235718, "learning_rate": 3.6548387357333833e-06, "loss": 2.226771354675293, "step": 101260 }, { "epoch": 0.8174648660429599, "grad_norm": 1.1914687156677246, "learning_rate": 3.65322326599517e-06, "loss": 2.1526006698608398, "step": 101270 }, { "epoch": 0.817545587368727, "grad_norm": 1.0722761154174805, "learning_rate": 3.651607796256957e-06, "loss": 2.350653076171875, "step": 101280 }, { "epoch": 0.817626308694494, "grad_norm": 0.5112090110778809, "learning_rate": 3.649992326518744e-06, "loss": 2.367264747619629, "step": 101290 }, { "epoch": 0.817707030020261, "grad_norm": 1.0953577756881714, "learning_rate": 3.6483768567805307e-06, "loss": 2.1330036163330077, "step": 101300 }, { "epoch": 0.8177877513460281, "grad_norm": 1.0732779502868652, "learning_rate": 3.6467613870423175e-06, "loss": 3.2271396636962892, "step": 101310 }, { "epoch": 0.8178684726717952, "grad_norm": 0.9368796348571777, "learning_rate": 3.6451459173041044e-06, "loss": 2.8105951309204102, "step": 101320 }, { "epoch": 0.8179491939975622, "grad_norm": 0.622528612613678, "learning_rate": 3.6435304475658912e-06, "loss": 2.251646041870117, "step": 101330 }, { "epoch": 0.8180299153233292, "grad_norm": 0.8480221629142761, "learning_rate": 3.641914977827678e-06, "loss": 2.7545562744140626, "step": 101340 }, { "epoch": 0.8181106366490963, "grad_norm": 1.1986948251724243, "learning_rate": 3.640299508089465e-06, "loss": 2.390022850036621, "step": 101350 }, { "epoch": 0.8181913579748634, "grad_norm": 0.7854283452033997, "learning_rate": 3.6386840383512518e-06, "loss": 2.441522407531738, "step": 101360 }, { "epoch": 0.8182720793006304, "grad_norm": 1.01580810546875, "learning_rate": 3.6370685686130386e-06, "loss": 2.2823469161987306, "step": 101370 }, { "epoch": 0.8183528006263975, "grad_norm": 0.7677692174911499, "learning_rate": 3.6354530988748254e-06, "loss": 2.533355140686035, "step": 101380 }, { "epoch": 0.8184335219521646, "grad_norm": 0.7403935194015503, "learning_rate": 3.6338376291366123e-06, "loss": 2.7643579483032226, "step": 101390 }, { "epoch": 0.8185142432779315, "grad_norm": 1.7519986629486084, "learning_rate": 3.632222159398399e-06, "loss": 2.235771369934082, "step": 101400 }, { "epoch": 0.8185949646036986, "grad_norm": 1.5808475017547607, "learning_rate": 3.630606689660186e-06, "loss": 2.401042366027832, "step": 101410 }, { "epoch": 0.8186756859294657, "grad_norm": 1.339707612991333, "learning_rate": 3.628991219921973e-06, "loss": 2.343027687072754, "step": 101420 }, { "epoch": 0.8187564072552328, "grad_norm": 0.941372275352478, "learning_rate": 3.6273757501837597e-06, "loss": 2.7305479049682617, "step": 101430 }, { "epoch": 0.8188371285809998, "grad_norm": 1.1990783214569092, "learning_rate": 3.6257602804455465e-06, "loss": 2.6834989547729493, "step": 101440 }, { "epoch": 0.8189178499067669, "grad_norm": 0.7815537452697754, "learning_rate": 3.6241448107073334e-06, "loss": 2.208610725402832, "step": 101450 }, { "epoch": 0.818998571232534, "grad_norm": 0.7806258201599121, "learning_rate": 3.6225293409691202e-06, "loss": 2.3647333145141602, "step": 101460 }, { "epoch": 0.819079292558301, "grad_norm": 1.1652333736419678, "learning_rate": 3.620913871230907e-06, "loss": 2.5509006500244142, "step": 101470 }, { "epoch": 0.819160013884068, "grad_norm": 1.6752609014511108, "learning_rate": 3.619298401492694e-06, "loss": 2.661898612976074, "step": 101480 }, { "epoch": 0.8192407352098351, "grad_norm": 0.6641468405723572, "learning_rate": 3.6176829317544808e-06, "loss": 2.4603172302246095, "step": 101490 }, { "epoch": 0.8193214565356022, "grad_norm": 1.047021746635437, "learning_rate": 3.6160674620162684e-06, "loss": 2.47229118347168, "step": 101500 }, { "epoch": 0.8194021778613692, "grad_norm": 1.119663953781128, "learning_rate": 3.6144519922780553e-06, "loss": 2.5739566802978517, "step": 101510 }, { "epoch": 0.8194828991871362, "grad_norm": 0.9551997780799866, "learning_rate": 3.612836522539842e-06, "loss": 2.099874496459961, "step": 101520 }, { "epoch": 0.8195636205129033, "grad_norm": 1.0952858924865723, "learning_rate": 3.611221052801629e-06, "loss": 2.4812952041625977, "step": 101530 }, { "epoch": 0.8196443418386704, "grad_norm": 0.38933396339416504, "learning_rate": 3.609605583063416e-06, "loss": 2.300105857849121, "step": 101540 }, { "epoch": 0.8197250631644374, "grad_norm": 0.7934264540672302, "learning_rate": 3.6079901133252027e-06, "loss": 2.366348457336426, "step": 101550 }, { "epoch": 0.8198057844902045, "grad_norm": 1.045688509941101, "learning_rate": 3.6063746435869895e-06, "loss": 2.1661907196044923, "step": 101560 }, { "epoch": 0.8198865058159716, "grad_norm": 1.0379719734191895, "learning_rate": 3.6047591738487764e-06, "loss": 2.321464729309082, "step": 101570 }, { "epoch": 0.8199672271417385, "grad_norm": 0.6998344659805298, "learning_rate": 3.603143704110563e-06, "loss": 2.250047492980957, "step": 101580 }, { "epoch": 0.8200479484675056, "grad_norm": 0.9771400690078735, "learning_rate": 3.60152823437235e-06, "loss": 2.704219436645508, "step": 101590 }, { "epoch": 0.8201286697932727, "grad_norm": 0.8359363675117493, "learning_rate": 3.599912764634137e-06, "loss": 2.454627799987793, "step": 101600 }, { "epoch": 0.8202093911190398, "grad_norm": 0.7344210147857666, "learning_rate": 3.5982972948959237e-06, "loss": 2.6329586029052736, "step": 101610 }, { "epoch": 0.8202901124448068, "grad_norm": 0.6433290839195251, "learning_rate": 3.5966818251577106e-06, "loss": 2.117367172241211, "step": 101620 }, { "epoch": 0.8203708337705738, "grad_norm": 1.1889715194702148, "learning_rate": 3.5950663554194974e-06, "loss": 2.309304618835449, "step": 101630 }, { "epoch": 0.8204515550963409, "grad_norm": 1.026530385017395, "learning_rate": 3.5934508856812843e-06, "loss": 2.831507682800293, "step": 101640 }, { "epoch": 0.8205322764221079, "grad_norm": 0.8924465775489807, "learning_rate": 3.591835415943071e-06, "loss": 2.7031682968139648, "step": 101650 }, { "epoch": 0.820612997747875, "grad_norm": 1.0845056772232056, "learning_rate": 3.590219946204858e-06, "loss": 2.545582580566406, "step": 101660 }, { "epoch": 0.8206937190736421, "grad_norm": 0.8782886862754822, "learning_rate": 3.588604476466645e-06, "loss": 2.3687694549560545, "step": 101670 }, { "epoch": 0.8207744403994092, "grad_norm": 1.2078533172607422, "learning_rate": 3.5869890067284317e-06, "loss": 2.6058574676513673, "step": 101680 }, { "epoch": 0.8208551617251761, "grad_norm": 0.9698945879936218, "learning_rate": 3.585373536990219e-06, "loss": 2.188273239135742, "step": 101690 }, { "epoch": 0.8209358830509432, "grad_norm": 0.5997578501701355, "learning_rate": 3.5837580672520058e-06, "loss": 2.2443017959594727, "step": 101700 }, { "epoch": 0.8210166043767103, "grad_norm": 1.396471619606018, "learning_rate": 3.5821425975137926e-06, "loss": 2.714933395385742, "step": 101710 }, { "epoch": 0.8210973257024773, "grad_norm": 1.15791654586792, "learning_rate": 3.5805271277755795e-06, "loss": 2.1713140487670897, "step": 101720 }, { "epoch": 0.8211780470282444, "grad_norm": 0.5749796032905579, "learning_rate": 3.5789116580373663e-06, "loss": 2.46215877532959, "step": 101730 }, { "epoch": 0.8212587683540115, "grad_norm": 0.7398450970649719, "learning_rate": 3.577296188299153e-06, "loss": 2.5246442794799804, "step": 101740 }, { "epoch": 0.8213394896797785, "grad_norm": 0.6550983786582947, "learning_rate": 3.57568071856094e-06, "loss": 2.103005599975586, "step": 101750 }, { "epoch": 0.8214202110055455, "grad_norm": 0.5792755484580994, "learning_rate": 3.574065248822727e-06, "loss": 2.2408639907836916, "step": 101760 }, { "epoch": 0.8215009323313126, "grad_norm": 0.6619178652763367, "learning_rate": 3.5724497790845137e-06, "loss": 2.432642364501953, "step": 101770 }, { "epoch": 0.8215816536570797, "grad_norm": 0.9510153532028198, "learning_rate": 3.5708343093463005e-06, "loss": 2.4115455627441404, "step": 101780 }, { "epoch": 0.8216623749828467, "grad_norm": 0.8114317655563354, "learning_rate": 3.5692188396080874e-06, "loss": 2.586964988708496, "step": 101790 }, { "epoch": 0.8217430963086138, "grad_norm": 1.0415517091751099, "learning_rate": 3.5676033698698742e-06, "loss": 2.581037139892578, "step": 101800 }, { "epoch": 0.8218238176343808, "grad_norm": 1.646180272102356, "learning_rate": 3.565987900131661e-06, "loss": 2.7723224639892576, "step": 101810 }, { "epoch": 0.8219045389601479, "grad_norm": 0.8405651450157166, "learning_rate": 3.564372430393448e-06, "loss": 2.3082416534423826, "step": 101820 }, { "epoch": 0.8219852602859149, "grad_norm": 0.9146290421485901, "learning_rate": 3.5627569606552348e-06, "loss": 2.0099077224731445, "step": 101830 }, { "epoch": 0.822065981611682, "grad_norm": 0.9403864145278931, "learning_rate": 3.5611414909170216e-06, "loss": 2.085661697387695, "step": 101840 }, { "epoch": 0.8221467029374491, "grad_norm": 1.2893527746200562, "learning_rate": 3.5595260211788085e-06, "loss": 2.4998096466064452, "step": 101850 }, { "epoch": 0.822227424263216, "grad_norm": 0.5614360570907593, "learning_rate": 3.5579105514405953e-06, "loss": 2.534591865539551, "step": 101860 }, { "epoch": 0.8223081455889831, "grad_norm": 0.7772709131240845, "learning_rate": 3.556295081702382e-06, "loss": 2.1576446533203124, "step": 101870 }, { "epoch": 0.8223888669147502, "grad_norm": 0.5977327823638916, "learning_rate": 3.554679611964169e-06, "loss": 2.444369888305664, "step": 101880 }, { "epoch": 0.8224695882405173, "grad_norm": 1.2858927249908447, "learning_rate": 3.553064142225956e-06, "loss": 2.6474325180053713, "step": 101890 }, { "epoch": 0.8225503095662843, "grad_norm": 1.0370701551437378, "learning_rate": 3.5514486724877427e-06, "loss": 2.8571678161621095, "step": 101900 }, { "epoch": 0.8226310308920514, "grad_norm": 0.7662781476974487, "learning_rate": 3.5498332027495295e-06, "loss": 2.0275348663330077, "step": 101910 }, { "epoch": 0.8227117522178184, "grad_norm": 1.6583898067474365, "learning_rate": 3.5482177330113164e-06, "loss": 2.511794090270996, "step": 101920 }, { "epoch": 0.8227924735435855, "grad_norm": 1.082042932510376, "learning_rate": 3.5466022632731032e-06, "loss": 2.2355146408081055, "step": 101930 }, { "epoch": 0.8228731948693525, "grad_norm": 0.8229674696922302, "learning_rate": 3.54498679353489e-06, "loss": 2.226434326171875, "step": 101940 }, { "epoch": 0.8229539161951196, "grad_norm": 1.2503763437271118, "learning_rate": 3.5433713237966773e-06, "loss": 2.116694450378418, "step": 101950 }, { "epoch": 0.8230346375208867, "grad_norm": 1.10013747215271, "learning_rate": 3.541755854058464e-06, "loss": 2.232769584655762, "step": 101960 }, { "epoch": 0.8231153588466537, "grad_norm": 0.8992975354194641, "learning_rate": 3.540140384320251e-06, "loss": 2.597612762451172, "step": 101970 }, { "epoch": 0.8231960801724207, "grad_norm": 1.022651195526123, "learning_rate": 3.538524914582038e-06, "loss": 2.6361486434936525, "step": 101980 }, { "epoch": 0.8232768014981878, "grad_norm": 0.6330999732017517, "learning_rate": 3.5369094448438247e-06, "loss": 2.11993408203125, "step": 101990 }, { "epoch": 0.8233575228239549, "grad_norm": 0.8076753616333008, "learning_rate": 3.5352939751056116e-06, "loss": 2.2359813690185546, "step": 102000 }, { "epoch": 0.8234382441497219, "grad_norm": 0.8708412647247314, "learning_rate": 3.5336785053673984e-06, "loss": 2.3495050430297852, "step": 102010 }, { "epoch": 0.823518965475489, "grad_norm": 0.8866798281669617, "learning_rate": 3.5320630356291853e-06, "loss": 2.1154014587402346, "step": 102020 }, { "epoch": 0.8235996868012561, "grad_norm": 1.1853691339492798, "learning_rate": 3.530447565890972e-06, "loss": 2.2904809951782226, "step": 102030 }, { "epoch": 0.823680408127023, "grad_norm": 1.297861933708191, "learning_rate": 3.528832096152759e-06, "loss": 2.483513069152832, "step": 102040 }, { "epoch": 0.8237611294527901, "grad_norm": 0.8153480291366577, "learning_rate": 3.527216626414546e-06, "loss": 2.3879301071166994, "step": 102050 }, { "epoch": 0.8238418507785572, "grad_norm": 2.938776969909668, "learning_rate": 3.5256011566763326e-06, "loss": 2.4554237365722655, "step": 102060 }, { "epoch": 0.8239225721043243, "grad_norm": 0.527430534362793, "learning_rate": 3.5239856869381195e-06, "loss": 2.063783645629883, "step": 102070 }, { "epoch": 0.8240032934300913, "grad_norm": 0.9619781970977783, "learning_rate": 3.5223702171999063e-06, "loss": 2.106190490722656, "step": 102080 }, { "epoch": 0.8240840147558584, "grad_norm": 1.2548789978027344, "learning_rate": 3.520754747461693e-06, "loss": 2.661884880065918, "step": 102090 }, { "epoch": 0.8241647360816254, "grad_norm": 1.0863151550292969, "learning_rate": 3.51913927772348e-06, "loss": 2.5482065200805666, "step": 102100 }, { "epoch": 0.8242454574073924, "grad_norm": 0.6692081689834595, "learning_rate": 3.517523807985267e-06, "loss": 2.5012557983398436, "step": 102110 }, { "epoch": 0.8243261787331595, "grad_norm": 0.4463365077972412, "learning_rate": 3.5159083382470537e-06, "loss": 1.8390907287597655, "step": 102120 }, { "epoch": 0.8244069000589266, "grad_norm": 0.987019956111908, "learning_rate": 3.5142928685088414e-06, "loss": 2.3102251052856446, "step": 102130 }, { "epoch": 0.8244876213846937, "grad_norm": 0.8198930621147156, "learning_rate": 3.5126773987706283e-06, "loss": 1.8896406173706055, "step": 102140 }, { "epoch": 0.8245683427104606, "grad_norm": 0.6950278282165527, "learning_rate": 3.511061929032415e-06, "loss": 2.2121660232543947, "step": 102150 }, { "epoch": 0.8246490640362277, "grad_norm": 0.9260571002960205, "learning_rate": 3.509446459294202e-06, "loss": 2.3974708557128905, "step": 102160 }, { "epoch": 0.8247297853619948, "grad_norm": 0.7388392686843872, "learning_rate": 3.507830989555989e-06, "loss": 2.204533576965332, "step": 102170 }, { "epoch": 0.8248105066877618, "grad_norm": 0.7662608027458191, "learning_rate": 3.5062155198177756e-06, "loss": 2.1362995147705077, "step": 102180 }, { "epoch": 0.8248912280135289, "grad_norm": 0.9419481754302979, "learning_rate": 3.5046000500795625e-06, "loss": 2.370332717895508, "step": 102190 }, { "epoch": 0.824971949339296, "grad_norm": 0.6519653797149658, "learning_rate": 3.5029845803413493e-06, "loss": 2.655915069580078, "step": 102200 }, { "epoch": 0.825052670665063, "grad_norm": 1.4568555355072021, "learning_rate": 3.501369110603136e-06, "loss": 2.5917179107666017, "step": 102210 }, { "epoch": 0.82513339199083, "grad_norm": 1.31035578250885, "learning_rate": 3.499753640864923e-06, "loss": 2.694647789001465, "step": 102220 }, { "epoch": 0.8252141133165971, "grad_norm": 1.1095999479293823, "learning_rate": 3.49813817112671e-06, "loss": 2.6035642623901367, "step": 102230 }, { "epoch": 0.8252948346423642, "grad_norm": 1.676523208618164, "learning_rate": 3.4965227013884967e-06, "loss": 2.4098087310791017, "step": 102240 }, { "epoch": 0.8253755559681312, "grad_norm": 0.8317538499832153, "learning_rate": 3.4949072316502836e-06, "loss": 2.493047332763672, "step": 102250 }, { "epoch": 0.8254562772938983, "grad_norm": 0.8274874687194824, "learning_rate": 3.4932917619120704e-06, "loss": 2.3258535385131838, "step": 102260 }, { "epoch": 0.8255369986196653, "grad_norm": 2.154850721359253, "learning_rate": 3.4916762921738573e-06, "loss": 2.319963836669922, "step": 102270 }, { "epoch": 0.8256177199454324, "grad_norm": 0.948818564414978, "learning_rate": 3.490060822435644e-06, "loss": 2.2063854217529295, "step": 102280 }, { "epoch": 0.8256984412711994, "grad_norm": 0.8725020885467529, "learning_rate": 3.488445352697431e-06, "loss": 2.7802711486816407, "step": 102290 }, { "epoch": 0.8257791625969665, "grad_norm": 0.8865939378738403, "learning_rate": 3.486829882959218e-06, "loss": 2.3883785247802733, "step": 102300 }, { "epoch": 0.8258598839227336, "grad_norm": 0.6497935652732849, "learning_rate": 3.4852144132210046e-06, "loss": 2.136198806762695, "step": 102310 }, { "epoch": 0.8259406052485005, "grad_norm": 0.6731685400009155, "learning_rate": 3.4835989434827915e-06, "loss": 2.327281951904297, "step": 102320 }, { "epoch": 0.8260213265742676, "grad_norm": 1.302485704421997, "learning_rate": 3.4819834737445783e-06, "loss": 2.2894296646118164, "step": 102330 }, { "epoch": 0.8261020479000347, "grad_norm": 1.179386019706726, "learning_rate": 3.480368004006365e-06, "loss": 2.7767799377441404, "step": 102340 }, { "epoch": 0.8261827692258018, "grad_norm": 1.1281741857528687, "learning_rate": 3.478752534268152e-06, "loss": 2.4149051666259767, "step": 102350 }, { "epoch": 0.8262634905515688, "grad_norm": 0.9323462843894958, "learning_rate": 3.477137064529939e-06, "loss": 2.781617546081543, "step": 102360 }, { "epoch": 0.8263442118773359, "grad_norm": 1.3486182689666748, "learning_rate": 3.4755215947917257e-06, "loss": 2.2730236053466797, "step": 102370 }, { "epoch": 0.826424933203103, "grad_norm": 1.1799348592758179, "learning_rate": 3.4739061250535126e-06, "loss": 2.5698537826538086, "step": 102380 }, { "epoch": 0.82650565452887, "grad_norm": 1.7743151187896729, "learning_rate": 3.4722906553153e-06, "loss": 2.836261177062988, "step": 102390 }, { "epoch": 0.826586375854637, "grad_norm": 0.9565390348434448, "learning_rate": 3.4706751855770867e-06, "loss": 2.811817932128906, "step": 102400 }, { "epoch": 0.8266670971804041, "grad_norm": 0.9449748992919922, "learning_rate": 3.4690597158388735e-06, "loss": 2.6786670684814453, "step": 102410 }, { "epoch": 0.8267478185061712, "grad_norm": 1.014727234840393, "learning_rate": 3.4674442461006604e-06, "loss": 1.934260368347168, "step": 102420 }, { "epoch": 0.8268285398319382, "grad_norm": 0.9972559213638306, "learning_rate": 3.465828776362447e-06, "loss": 2.6505369186401366, "step": 102430 }, { "epoch": 0.8269092611577052, "grad_norm": 1.5202730894088745, "learning_rate": 3.464213306624234e-06, "loss": 2.6807722091674804, "step": 102440 }, { "epoch": 0.8269899824834723, "grad_norm": 0.9293633103370667, "learning_rate": 3.462597836886021e-06, "loss": 2.2077606201171873, "step": 102450 }, { "epoch": 0.8270707038092394, "grad_norm": 1.1619682312011719, "learning_rate": 3.4609823671478077e-06, "loss": 2.6647783279418946, "step": 102460 }, { "epoch": 0.8271514251350064, "grad_norm": 1.3992623090744019, "learning_rate": 3.4593668974095946e-06, "loss": 2.897445869445801, "step": 102470 }, { "epoch": 0.8272321464607735, "grad_norm": 0.7701808214187622, "learning_rate": 3.4577514276713814e-06, "loss": 2.2236053466796877, "step": 102480 }, { "epoch": 0.8273128677865406, "grad_norm": 0.9809767007827759, "learning_rate": 3.4561359579331683e-06, "loss": 2.4343976974487305, "step": 102490 }, { "epoch": 0.8273935891123075, "grad_norm": 1.6364346742630005, "learning_rate": 3.454520488194955e-06, "loss": 2.4048391342163087, "step": 102500 }, { "epoch": 0.8274743104380746, "grad_norm": 0.9435386657714844, "learning_rate": 3.452905018456742e-06, "loss": 2.40692195892334, "step": 102510 }, { "epoch": 0.8275550317638417, "grad_norm": 1.8389192819595337, "learning_rate": 3.451289548718529e-06, "loss": 2.943787956237793, "step": 102520 }, { "epoch": 0.8276357530896088, "grad_norm": 0.9488006234169006, "learning_rate": 3.4496740789803157e-06, "loss": 2.7401552200317383, "step": 102530 }, { "epoch": 0.8277164744153758, "grad_norm": 1.0305025577545166, "learning_rate": 3.4480586092421025e-06, "loss": 2.6000823974609375, "step": 102540 }, { "epoch": 0.8277971957411429, "grad_norm": 0.6788678765296936, "learning_rate": 3.4464431395038894e-06, "loss": 2.474343490600586, "step": 102550 }, { "epoch": 0.8278779170669099, "grad_norm": 0.5791396498680115, "learning_rate": 3.444827669765676e-06, "loss": 1.8570009231567384, "step": 102560 }, { "epoch": 0.8279586383926769, "grad_norm": 1.232698917388916, "learning_rate": 3.443212200027463e-06, "loss": 2.0798763275146483, "step": 102570 }, { "epoch": 0.828039359718444, "grad_norm": 0.9114251732826233, "learning_rate": 3.44159673028925e-06, "loss": 2.277114486694336, "step": 102580 }, { "epoch": 0.8281200810442111, "grad_norm": 0.5710298418998718, "learning_rate": 3.4399812605510367e-06, "loss": 2.2296865463256834, "step": 102590 }, { "epoch": 0.8282008023699782, "grad_norm": 1.040858507156372, "learning_rate": 3.4383657908128236e-06, "loss": 2.8116073608398438, "step": 102600 }, { "epoch": 0.8282815236957451, "grad_norm": 0.6204900741577148, "learning_rate": 3.4367503210746104e-06, "loss": 2.4238086700439454, "step": 102610 }, { "epoch": 0.8283622450215122, "grad_norm": 1.2205510139465332, "learning_rate": 3.4351348513363973e-06, "loss": 2.0730838775634766, "step": 102620 }, { "epoch": 0.8284429663472793, "grad_norm": 0.8431887030601501, "learning_rate": 3.433519381598184e-06, "loss": 2.638131523132324, "step": 102630 }, { "epoch": 0.8285236876730463, "grad_norm": 0.6691209673881531, "learning_rate": 3.431903911859971e-06, "loss": 2.4527936935424806, "step": 102640 }, { "epoch": 0.8286044089988134, "grad_norm": 1.3087234497070312, "learning_rate": 3.4302884421217582e-06, "loss": 2.348976707458496, "step": 102650 }, { "epoch": 0.8286851303245805, "grad_norm": 1.258396029472351, "learning_rate": 3.428672972383545e-06, "loss": 2.2992986679077148, "step": 102660 }, { "epoch": 0.8287658516503476, "grad_norm": 0.6299833059310913, "learning_rate": 3.427057502645332e-06, "loss": 2.2015085220336914, "step": 102670 }, { "epoch": 0.8288465729761145, "grad_norm": 0.5740859508514404, "learning_rate": 3.4254420329071188e-06, "loss": 2.569039726257324, "step": 102680 }, { "epoch": 0.8289272943018816, "grad_norm": 0.9833943247795105, "learning_rate": 3.4238265631689056e-06, "loss": 2.1228975296020507, "step": 102690 }, { "epoch": 0.8290080156276487, "grad_norm": 1.031181812286377, "learning_rate": 3.4222110934306925e-06, "loss": 1.9767335891723632, "step": 102700 }, { "epoch": 0.8290887369534157, "grad_norm": 1.1661309003829956, "learning_rate": 3.4205956236924793e-06, "loss": 2.156883430480957, "step": 102710 }, { "epoch": 0.8291694582791828, "grad_norm": 1.1069142818450928, "learning_rate": 3.418980153954266e-06, "loss": 2.5670379638671874, "step": 102720 }, { "epoch": 0.8292501796049498, "grad_norm": 1.1897977590560913, "learning_rate": 3.417364684216053e-06, "loss": 2.4301630020141602, "step": 102730 }, { "epoch": 0.8293309009307169, "grad_norm": 0.8643273115158081, "learning_rate": 3.41574921447784e-06, "loss": 2.2383138656616213, "step": 102740 }, { "epoch": 0.8294116222564839, "grad_norm": 0.8958086371421814, "learning_rate": 3.414133744739627e-06, "loss": 2.3555458068847654, "step": 102750 }, { "epoch": 0.829492343582251, "grad_norm": 1.113021731376648, "learning_rate": 3.412518275001414e-06, "loss": 2.6083370208740235, "step": 102760 }, { "epoch": 0.8295730649080181, "grad_norm": 0.9938234090805054, "learning_rate": 3.410902805263201e-06, "loss": 2.2328258514404298, "step": 102770 }, { "epoch": 0.829653786233785, "grad_norm": 0.7886744141578674, "learning_rate": 3.4092873355249877e-06, "loss": 2.1493316650390626, "step": 102780 }, { "epoch": 0.8297345075595521, "grad_norm": 1.1046581268310547, "learning_rate": 3.4076718657867745e-06, "loss": 2.8002107620239256, "step": 102790 }, { "epoch": 0.8298152288853192, "grad_norm": 0.9092274308204651, "learning_rate": 3.4060563960485613e-06, "loss": 2.785609245300293, "step": 102800 }, { "epoch": 0.8298959502110863, "grad_norm": 1.3202322721481323, "learning_rate": 3.404440926310348e-06, "loss": 3.0142436981201173, "step": 102810 }, { "epoch": 0.8299766715368533, "grad_norm": 0.947398841381073, "learning_rate": 3.4028254565721355e-06, "loss": 2.4124738693237306, "step": 102820 }, { "epoch": 0.8300573928626204, "grad_norm": 2.7563884258270264, "learning_rate": 3.4012099868339223e-06, "loss": 2.7217992782592773, "step": 102830 }, { "epoch": 0.8301381141883875, "grad_norm": 0.9383166432380676, "learning_rate": 3.399594517095709e-06, "loss": 2.199301338195801, "step": 102840 }, { "epoch": 0.8302188355141544, "grad_norm": 0.36268341541290283, "learning_rate": 3.397979047357496e-06, "loss": 2.547418403625488, "step": 102850 }, { "epoch": 0.8302995568399215, "grad_norm": 1.3417242765426636, "learning_rate": 3.396363577619283e-06, "loss": 2.2058025360107423, "step": 102860 }, { "epoch": 0.8303802781656886, "grad_norm": 1.288663387298584, "learning_rate": 3.3947481078810697e-06, "loss": 2.556355285644531, "step": 102870 }, { "epoch": 0.8304609994914557, "grad_norm": 0.45688486099243164, "learning_rate": 3.3931326381428565e-06, "loss": 2.574193572998047, "step": 102880 }, { "epoch": 0.8305417208172227, "grad_norm": 1.1286464929580688, "learning_rate": 3.3915171684046434e-06, "loss": 2.7933835983276367, "step": 102890 }, { "epoch": 0.8306224421429897, "grad_norm": 0.6421712040901184, "learning_rate": 3.3899016986664302e-06, "loss": 2.2367376327514648, "step": 102900 }, { "epoch": 0.8307031634687568, "grad_norm": 0.7755709290504456, "learning_rate": 3.388286228928217e-06, "loss": 2.5768783569335936, "step": 102910 }, { "epoch": 0.8307838847945239, "grad_norm": 1.149490475654602, "learning_rate": 3.386670759190004e-06, "loss": 2.588724708557129, "step": 102920 }, { "epoch": 0.8308646061202909, "grad_norm": 1.0972980260849, "learning_rate": 3.3850552894517908e-06, "loss": 2.585516166687012, "step": 102930 }, { "epoch": 0.830945327446058, "grad_norm": 0.8643388152122498, "learning_rate": 3.3834398197135776e-06, "loss": 2.6417911529541014, "step": 102940 }, { "epoch": 0.8310260487718251, "grad_norm": 3.1564462184906006, "learning_rate": 3.3818243499753645e-06, "loss": 3.150094985961914, "step": 102950 }, { "epoch": 0.831106770097592, "grad_norm": 0.6709712147712708, "learning_rate": 3.3802088802371513e-06, "loss": 2.8258716583251955, "step": 102960 }, { "epoch": 0.8311874914233591, "grad_norm": 1.0190054178237915, "learning_rate": 3.378593410498938e-06, "loss": 2.159407043457031, "step": 102970 }, { "epoch": 0.8312682127491262, "grad_norm": 0.8935858607292175, "learning_rate": 3.376977940760725e-06, "loss": 2.425249481201172, "step": 102980 }, { "epoch": 0.8313489340748933, "grad_norm": 0.6009531021118164, "learning_rate": 3.375362471022512e-06, "loss": 2.4149646759033203, "step": 102990 }, { "epoch": 0.8314296554006603, "grad_norm": 0.6674354672431946, "learning_rate": 3.3737470012842987e-06, "loss": 2.2463891983032225, "step": 103000 }, { "epoch": 0.8315103767264274, "grad_norm": 1.110129952430725, "learning_rate": 3.3721315315460855e-06, "loss": 2.607649040222168, "step": 103010 }, { "epoch": 0.8315910980521944, "grad_norm": 2.2793731689453125, "learning_rate": 3.3705160618078724e-06, "loss": 2.889390563964844, "step": 103020 }, { "epoch": 0.8316718193779614, "grad_norm": 0.797387957572937, "learning_rate": 3.3689005920696592e-06, "loss": 2.5380886077880858, "step": 103030 }, { "epoch": 0.8317525407037285, "grad_norm": 0.9786804914474487, "learning_rate": 3.367285122331446e-06, "loss": 2.4808137893676756, "step": 103040 }, { "epoch": 0.8318332620294956, "grad_norm": 1.3544338941574097, "learning_rate": 3.365669652593233e-06, "loss": 2.7297414779663085, "step": 103050 }, { "epoch": 0.8319139833552627, "grad_norm": 0.8370043635368347, "learning_rate": 3.3640541828550198e-06, "loss": 2.246776580810547, "step": 103060 }, { "epoch": 0.8319947046810297, "grad_norm": 0.8765773773193359, "learning_rate": 3.3624387131168066e-06, "loss": 2.6686559677124024, "step": 103070 }, { "epoch": 0.8320754260067967, "grad_norm": 1.2377251386642456, "learning_rate": 3.360823243378594e-06, "loss": 2.120744514465332, "step": 103080 }, { "epoch": 0.8321561473325638, "grad_norm": 0.8909825086593628, "learning_rate": 3.3592077736403807e-06, "loss": 2.207737922668457, "step": 103090 }, { "epoch": 0.8322368686583308, "grad_norm": 0.8911736011505127, "learning_rate": 3.3575923039021676e-06, "loss": 2.133571243286133, "step": 103100 }, { "epoch": 0.8323175899840979, "grad_norm": 0.5163034796714783, "learning_rate": 3.3559768341639544e-06, "loss": 2.4466930389404298, "step": 103110 }, { "epoch": 0.832398311309865, "grad_norm": 1.0259852409362793, "learning_rate": 3.3543613644257413e-06, "loss": 2.179783821105957, "step": 103120 }, { "epoch": 0.832479032635632, "grad_norm": 0.8728327751159668, "learning_rate": 3.352745894687528e-06, "loss": 2.96490478515625, "step": 103130 }, { "epoch": 0.832559753961399, "grad_norm": 0.8738598823547363, "learning_rate": 3.351130424949315e-06, "loss": 2.2270744323730467, "step": 103140 }, { "epoch": 0.8326404752871661, "grad_norm": 1.2367998361587524, "learning_rate": 3.349514955211102e-06, "loss": 2.456459617614746, "step": 103150 }, { "epoch": 0.8327211966129332, "grad_norm": 0.865297794342041, "learning_rate": 3.3478994854728886e-06, "loss": 3.3120651245117188, "step": 103160 }, { "epoch": 0.8328019179387002, "grad_norm": 0.6217265725135803, "learning_rate": 3.3462840157346755e-06, "loss": 2.403478813171387, "step": 103170 }, { "epoch": 0.8328826392644673, "grad_norm": 0.7417458295822144, "learning_rate": 3.3446685459964623e-06, "loss": 1.8876733779907227, "step": 103180 }, { "epoch": 0.8329633605902343, "grad_norm": 1.091446876525879, "learning_rate": 3.343053076258249e-06, "loss": 2.1878150939941405, "step": 103190 }, { "epoch": 0.8330440819160014, "grad_norm": 0.9527947306632996, "learning_rate": 3.341437606520036e-06, "loss": 2.34133358001709, "step": 103200 }, { "epoch": 0.8331248032417684, "grad_norm": 1.0509932041168213, "learning_rate": 3.339822136781823e-06, "loss": 2.3189712524414063, "step": 103210 }, { "epoch": 0.8332055245675355, "grad_norm": 1.020471453666687, "learning_rate": 3.3382066670436097e-06, "loss": 2.323057174682617, "step": 103220 }, { "epoch": 0.8332862458933026, "grad_norm": 1.2905629873275757, "learning_rate": 3.3365911973053966e-06, "loss": 2.4595291137695314, "step": 103230 }, { "epoch": 0.8333669672190696, "grad_norm": 1.5126522779464722, "learning_rate": 3.3349757275671834e-06, "loss": 2.574546241760254, "step": 103240 }, { "epoch": 0.8334476885448366, "grad_norm": 1.0806021690368652, "learning_rate": 3.3333602578289703e-06, "loss": 2.6361454010009764, "step": 103250 }, { "epoch": 0.8335284098706037, "grad_norm": 1.288131833076477, "learning_rate": 3.331744788090757e-06, "loss": 2.641325759887695, "step": 103260 }, { "epoch": 0.8336091311963708, "grad_norm": 1.465637445449829, "learning_rate": 3.330129318352544e-06, "loss": 2.687735366821289, "step": 103270 }, { "epoch": 0.8336898525221378, "grad_norm": 0.8039804697036743, "learning_rate": 3.328513848614331e-06, "loss": 1.8393220901489258, "step": 103280 }, { "epoch": 0.8337705738479049, "grad_norm": 0.6872288584709167, "learning_rate": 3.3268983788761176e-06, "loss": 2.3254446029663085, "step": 103290 }, { "epoch": 0.833851295173672, "grad_norm": 0.8876940608024597, "learning_rate": 3.3252829091379045e-06, "loss": 2.589876174926758, "step": 103300 }, { "epoch": 0.8339320164994389, "grad_norm": 2.3199334144592285, "learning_rate": 3.3236674393996913e-06, "loss": 2.7588653564453125, "step": 103310 }, { "epoch": 0.834012737825206, "grad_norm": 1.1742684841156006, "learning_rate": 3.322051969661478e-06, "loss": 2.4650009155273436, "step": 103320 }, { "epoch": 0.8340934591509731, "grad_norm": 0.6333427429199219, "learning_rate": 3.320436499923265e-06, "loss": 2.3978855133056642, "step": 103330 }, { "epoch": 0.8341741804767402, "grad_norm": 1.3059558868408203, "learning_rate": 3.3188210301850523e-06, "loss": 2.027419090270996, "step": 103340 }, { "epoch": 0.8342549018025072, "grad_norm": 0.959839940071106, "learning_rate": 3.317205560446839e-06, "loss": 2.5455228805541994, "step": 103350 }, { "epoch": 0.8343356231282743, "grad_norm": 0.7996620535850525, "learning_rate": 3.315590090708626e-06, "loss": 2.6735849380493164, "step": 103360 }, { "epoch": 0.8344163444540413, "grad_norm": 0.5560782551765442, "learning_rate": 3.313974620970413e-06, "loss": 2.384359359741211, "step": 103370 }, { "epoch": 0.8344970657798084, "grad_norm": 1.1771585941314697, "learning_rate": 3.3123591512322e-06, "loss": 2.4219064712524414, "step": 103380 }, { "epoch": 0.8345777871055754, "grad_norm": 1.4198118448257446, "learning_rate": 3.310743681493987e-06, "loss": 2.774355888366699, "step": 103390 }, { "epoch": 0.8346585084313425, "grad_norm": 1.233613133430481, "learning_rate": 3.3091282117557738e-06, "loss": 2.321026611328125, "step": 103400 }, { "epoch": 0.8347392297571096, "grad_norm": 1.22036874294281, "learning_rate": 3.3075127420175606e-06, "loss": 1.9859043121337892, "step": 103410 }, { "epoch": 0.8348199510828765, "grad_norm": 1.3706613779067993, "learning_rate": 3.3058972722793475e-06, "loss": 2.3977329254150392, "step": 103420 }, { "epoch": 0.8349006724086436, "grad_norm": 1.0077658891677856, "learning_rate": 3.3042818025411343e-06, "loss": 2.8328428268432617, "step": 103430 }, { "epoch": 0.8349813937344107, "grad_norm": 0.5953102111816406, "learning_rate": 3.302666332802921e-06, "loss": 2.236467742919922, "step": 103440 }, { "epoch": 0.8350621150601778, "grad_norm": 0.9356125593185425, "learning_rate": 3.301050863064708e-06, "loss": 2.480657958984375, "step": 103450 }, { "epoch": 0.8351428363859448, "grad_norm": 1.5251108407974243, "learning_rate": 3.299435393326495e-06, "loss": 2.3374353408813477, "step": 103460 }, { "epoch": 0.8352235577117119, "grad_norm": 0.8182694315910339, "learning_rate": 3.2978199235882817e-06, "loss": 2.280919075012207, "step": 103470 }, { "epoch": 0.835304279037479, "grad_norm": 0.9087092876434326, "learning_rate": 3.2962044538500685e-06, "loss": 2.681534194946289, "step": 103480 }, { "epoch": 0.8353850003632459, "grad_norm": 0.5827991366386414, "learning_rate": 3.2945889841118554e-06, "loss": 2.38448429107666, "step": 103490 }, { "epoch": 0.835465721689013, "grad_norm": 0.7865458726882935, "learning_rate": 3.2929735143736422e-06, "loss": 2.352130126953125, "step": 103500 }, { "epoch": 0.8355464430147801, "grad_norm": 0.6851180791854858, "learning_rate": 3.291358044635429e-06, "loss": 2.206266975402832, "step": 103510 }, { "epoch": 0.8356271643405472, "grad_norm": 0.8205773234367371, "learning_rate": 3.2897425748972164e-06, "loss": 2.36641845703125, "step": 103520 }, { "epoch": 0.8357078856663142, "grad_norm": 0.9835792183876038, "learning_rate": 3.288127105159003e-06, "loss": 2.5250822067260743, "step": 103530 }, { "epoch": 0.8357886069920812, "grad_norm": 0.8038817048072815, "learning_rate": 3.28651163542079e-06, "loss": 2.53888053894043, "step": 103540 }, { "epoch": 0.8358693283178483, "grad_norm": 0.6820738315582275, "learning_rate": 3.284896165682577e-06, "loss": 2.663854789733887, "step": 103550 }, { "epoch": 0.8359500496436153, "grad_norm": 0.5718092322349548, "learning_rate": 3.2832806959443637e-06, "loss": 2.841356086730957, "step": 103560 }, { "epoch": 0.8360307709693824, "grad_norm": 1.4165951013565063, "learning_rate": 3.2816652262061506e-06, "loss": 2.1434656143188477, "step": 103570 }, { "epoch": 0.8361114922951495, "grad_norm": 0.7051489353179932, "learning_rate": 3.2800497564679374e-06, "loss": 2.3388177871704103, "step": 103580 }, { "epoch": 0.8361922136209166, "grad_norm": 1.093329906463623, "learning_rate": 3.2784342867297243e-06, "loss": 2.507217597961426, "step": 103590 }, { "epoch": 0.8362729349466835, "grad_norm": 0.7595781087875366, "learning_rate": 3.276818816991511e-06, "loss": 2.269728660583496, "step": 103600 }, { "epoch": 0.8363536562724506, "grad_norm": 0.7784021496772766, "learning_rate": 3.275203347253298e-06, "loss": 2.4621089935302733, "step": 103610 }, { "epoch": 0.8364343775982177, "grad_norm": 1.649470567703247, "learning_rate": 3.273587877515085e-06, "loss": 2.33612060546875, "step": 103620 }, { "epoch": 0.8365150989239847, "grad_norm": 0.9069963097572327, "learning_rate": 3.2719724077768717e-06, "loss": 1.9324485778808593, "step": 103630 }, { "epoch": 0.8365958202497518, "grad_norm": 0.8502926230430603, "learning_rate": 3.2703569380386585e-06, "loss": 2.4002918243408202, "step": 103640 }, { "epoch": 0.8366765415755189, "grad_norm": 0.553156316280365, "learning_rate": 3.2687414683004454e-06, "loss": 2.678207206726074, "step": 103650 }, { "epoch": 0.8367572629012859, "grad_norm": 0.6265285015106201, "learning_rate": 3.267125998562232e-06, "loss": 2.3280811309814453, "step": 103660 }, { "epoch": 0.8368379842270529, "grad_norm": 1.1681650876998901, "learning_rate": 3.265510528824019e-06, "loss": 2.768038749694824, "step": 103670 }, { "epoch": 0.83691870555282, "grad_norm": 1.4966237545013428, "learning_rate": 3.263895059085806e-06, "loss": 2.5727716445922852, "step": 103680 }, { "epoch": 0.8369994268785871, "grad_norm": 0.7426947355270386, "learning_rate": 3.2622795893475927e-06, "loss": 2.719812202453613, "step": 103690 }, { "epoch": 0.8370801482043541, "grad_norm": 0.761614978313446, "learning_rate": 3.2606641196093796e-06, "loss": 2.784313774108887, "step": 103700 }, { "epoch": 0.8371608695301211, "grad_norm": 0.5351317524909973, "learning_rate": 3.2590486498711664e-06, "loss": 2.1590662002563477, "step": 103710 }, { "epoch": 0.8372415908558882, "grad_norm": 0.8047211766242981, "learning_rate": 3.2574331801329533e-06, "loss": 2.2114978790283204, "step": 103720 }, { "epoch": 0.8373223121816553, "grad_norm": 1.3744491338729858, "learning_rate": 3.25581771039474e-06, "loss": 2.7413957595825194, "step": 103730 }, { "epoch": 0.8374030335074223, "grad_norm": 0.7387563586235046, "learning_rate": 3.254202240656527e-06, "loss": 2.7519657135009767, "step": 103740 }, { "epoch": 0.8374837548331894, "grad_norm": 0.7409839630126953, "learning_rate": 3.252586770918314e-06, "loss": 2.340580940246582, "step": 103750 }, { "epoch": 0.8375644761589565, "grad_norm": 0.970633864402771, "learning_rate": 3.2509713011801007e-06, "loss": 2.4111743927001954, "step": 103760 }, { "epoch": 0.8376451974847234, "grad_norm": 0.7278106212615967, "learning_rate": 3.2493558314418875e-06, "loss": 2.8917861938476563, "step": 103770 }, { "epoch": 0.8377259188104905, "grad_norm": 1.3396121263504028, "learning_rate": 3.2477403617036748e-06, "loss": 2.0105642318725585, "step": 103780 }, { "epoch": 0.8378066401362576, "grad_norm": 0.7701051235198975, "learning_rate": 3.2461248919654616e-06, "loss": 2.8543937683105467, "step": 103790 }, { "epoch": 0.8378873614620247, "grad_norm": 1.4913095235824585, "learning_rate": 3.2445094222272485e-06, "loss": 2.760467529296875, "step": 103800 }, { "epoch": 0.8379680827877917, "grad_norm": 1.209702491760254, "learning_rate": 3.2428939524890353e-06, "loss": 2.1083499908447267, "step": 103810 }, { "epoch": 0.8380488041135588, "grad_norm": 0.8047317266464233, "learning_rate": 3.241278482750822e-06, "loss": 2.5121225357055663, "step": 103820 }, { "epoch": 0.8381295254393258, "grad_norm": 0.7721013426780701, "learning_rate": 3.239663013012609e-06, "loss": 2.258370208740234, "step": 103830 }, { "epoch": 0.8382102467650928, "grad_norm": 0.9590232372283936, "learning_rate": 3.238047543274396e-06, "loss": 2.117641639709473, "step": 103840 }, { "epoch": 0.8382909680908599, "grad_norm": 0.9597278833389282, "learning_rate": 3.2364320735361827e-06, "loss": 2.2253156661987306, "step": 103850 }, { "epoch": 0.838371689416627, "grad_norm": 0.7802923321723938, "learning_rate": 3.2348166037979695e-06, "loss": 2.17165584564209, "step": 103860 }, { "epoch": 0.8384524107423941, "grad_norm": 0.8218666911125183, "learning_rate": 3.2332011340597564e-06, "loss": 1.9637624740600585, "step": 103870 }, { "epoch": 0.838533132068161, "grad_norm": 1.3081998825073242, "learning_rate": 3.2315856643215432e-06, "loss": 2.956820487976074, "step": 103880 }, { "epoch": 0.8386138533939281, "grad_norm": 1.1351162195205688, "learning_rate": 3.22997019458333e-06, "loss": 2.450306510925293, "step": 103890 }, { "epoch": 0.8386945747196952, "grad_norm": 1.1767500638961792, "learning_rate": 3.228354724845117e-06, "loss": 2.3734462738037108, "step": 103900 }, { "epoch": 0.8387752960454623, "grad_norm": 0.9274628162384033, "learning_rate": 3.2267392551069038e-06, "loss": 1.9524618148803712, "step": 103910 }, { "epoch": 0.8388560173712293, "grad_norm": 0.7625022530555725, "learning_rate": 3.2251237853686906e-06, "loss": 1.9932714462280274, "step": 103920 }, { "epoch": 0.8389367386969964, "grad_norm": 0.7320339679718018, "learning_rate": 3.2235083156304775e-06, "loss": 2.3448465347290037, "step": 103930 }, { "epoch": 0.8390174600227635, "grad_norm": 1.0096880197525024, "learning_rate": 3.2218928458922643e-06, "loss": 2.5105445861816404, "step": 103940 }, { "epoch": 0.8390981813485304, "grad_norm": 1.0540412664413452, "learning_rate": 3.220277376154051e-06, "loss": 2.4469369888305663, "step": 103950 }, { "epoch": 0.8391789026742975, "grad_norm": 0.8740601539611816, "learning_rate": 3.218661906415838e-06, "loss": 1.9076168060302734, "step": 103960 }, { "epoch": 0.8392596240000646, "grad_norm": 1.3925673961639404, "learning_rate": 3.217046436677625e-06, "loss": 2.2364086151123046, "step": 103970 }, { "epoch": 0.8393403453258317, "grad_norm": 0.9042688012123108, "learning_rate": 3.2154309669394117e-06, "loss": 2.155613327026367, "step": 103980 }, { "epoch": 0.8394210666515987, "grad_norm": 1.2561300992965698, "learning_rate": 3.2138154972011985e-06, "loss": 2.471757698059082, "step": 103990 }, { "epoch": 0.8395017879773657, "grad_norm": 0.8432270884513855, "learning_rate": 3.2122000274629854e-06, "loss": 2.3054166793823243, "step": 104000 }, { "epoch": 0.8395825093031328, "grad_norm": 0.7295492887496948, "learning_rate": 3.210584557724773e-06, "loss": 2.713701629638672, "step": 104010 }, { "epoch": 0.8396632306288998, "grad_norm": 1.2224618196487427, "learning_rate": 3.20896908798656e-06, "loss": 2.3129886627197265, "step": 104020 }, { "epoch": 0.8397439519546669, "grad_norm": 0.6482437252998352, "learning_rate": 3.2073536182483468e-06, "loss": 2.260497283935547, "step": 104030 }, { "epoch": 0.839824673280434, "grad_norm": 0.8767191767692566, "learning_rate": 3.2057381485101336e-06, "loss": 2.3922677993774415, "step": 104040 }, { "epoch": 0.8399053946062011, "grad_norm": 1.209026575088501, "learning_rate": 3.2041226787719204e-06, "loss": 2.880940628051758, "step": 104050 }, { "epoch": 0.839986115931968, "grad_norm": 1.2465996742248535, "learning_rate": 3.2025072090337073e-06, "loss": 2.0935503005981446, "step": 104060 }, { "epoch": 0.8400668372577351, "grad_norm": 0.9304888248443604, "learning_rate": 3.200891739295494e-06, "loss": 2.457607078552246, "step": 104070 }, { "epoch": 0.8401475585835022, "grad_norm": 0.9135899543762207, "learning_rate": 3.199276269557281e-06, "loss": 2.2986328125, "step": 104080 }, { "epoch": 0.8402282799092692, "grad_norm": 0.5502814054489136, "learning_rate": 3.197660799819068e-06, "loss": 2.7291717529296875, "step": 104090 }, { "epoch": 0.8403090012350363, "grad_norm": 0.8930718302726746, "learning_rate": 3.1960453300808547e-06, "loss": 2.031650733947754, "step": 104100 }, { "epoch": 0.8403897225608034, "grad_norm": 0.853894829750061, "learning_rate": 3.1944298603426415e-06, "loss": 2.187759780883789, "step": 104110 }, { "epoch": 0.8404704438865704, "grad_norm": 1.1205825805664062, "learning_rate": 3.1928143906044284e-06, "loss": 2.696897506713867, "step": 104120 }, { "epoch": 0.8405511652123374, "grad_norm": 1.4266862869262695, "learning_rate": 3.1911989208662152e-06, "loss": 2.3327198028564453, "step": 104130 }, { "epoch": 0.8406318865381045, "grad_norm": 0.5152284502983093, "learning_rate": 3.189583451128002e-06, "loss": 2.074471092224121, "step": 104140 }, { "epoch": 0.8407126078638716, "grad_norm": 0.6888201832771301, "learning_rate": 3.187967981389789e-06, "loss": 2.4011581420898436, "step": 104150 }, { "epoch": 0.8407933291896386, "grad_norm": 1.2875829935073853, "learning_rate": 3.1863525116515758e-06, "loss": 2.2508020401000977, "step": 104160 }, { "epoch": 0.8408740505154056, "grad_norm": 0.6175938844680786, "learning_rate": 3.1847370419133626e-06, "loss": 2.422939491271973, "step": 104170 }, { "epoch": 0.8409547718411727, "grad_norm": 1.2269870042800903, "learning_rate": 3.1831215721751494e-06, "loss": 2.2090797424316406, "step": 104180 }, { "epoch": 0.8410354931669398, "grad_norm": 1.2522664070129395, "learning_rate": 3.1815061024369363e-06, "loss": 2.780068016052246, "step": 104190 }, { "epoch": 0.8411162144927068, "grad_norm": 0.8008440136909485, "learning_rate": 3.179890632698723e-06, "loss": 1.9326194763183593, "step": 104200 }, { "epoch": 0.8411969358184739, "grad_norm": 0.6861234903335571, "learning_rate": 3.1782751629605104e-06, "loss": 2.7046890258789062, "step": 104210 }, { "epoch": 0.841277657144241, "grad_norm": 0.7461750507354736, "learning_rate": 3.1766596932222972e-06, "loss": 2.0150136947631836, "step": 104220 }, { "epoch": 0.8413583784700079, "grad_norm": 0.8901320695877075, "learning_rate": 3.175044223484084e-06, "loss": 2.2346887588500977, "step": 104230 }, { "epoch": 0.841439099795775, "grad_norm": 0.8606923222541809, "learning_rate": 3.173428753745871e-06, "loss": 2.5590230941772463, "step": 104240 }, { "epoch": 0.8415198211215421, "grad_norm": 1.2812696695327759, "learning_rate": 3.1718132840076578e-06, "loss": 2.5572336196899412, "step": 104250 }, { "epoch": 0.8416005424473092, "grad_norm": 0.9426363706588745, "learning_rate": 3.1701978142694446e-06, "loss": 2.15877742767334, "step": 104260 }, { "epoch": 0.8416812637730762, "grad_norm": 0.9180253744125366, "learning_rate": 3.1685823445312315e-06, "loss": 1.9707691192626953, "step": 104270 }, { "epoch": 0.8417619850988433, "grad_norm": 1.002261757850647, "learning_rate": 3.1669668747930183e-06, "loss": 3.043814849853516, "step": 104280 }, { "epoch": 0.8418427064246103, "grad_norm": 0.9936040043830872, "learning_rate": 3.165351405054805e-06, "loss": 2.4254758834838865, "step": 104290 }, { "epoch": 0.8419234277503773, "grad_norm": 1.079334020614624, "learning_rate": 3.163735935316592e-06, "loss": 2.082159423828125, "step": 104300 }, { "epoch": 0.8420041490761444, "grad_norm": 0.5067598819732666, "learning_rate": 3.162120465578379e-06, "loss": 2.196039009094238, "step": 104310 }, { "epoch": 0.8420848704019115, "grad_norm": 0.7929187417030334, "learning_rate": 3.1605049958401657e-06, "loss": 2.2196258544921874, "step": 104320 }, { "epoch": 0.8421655917276786, "grad_norm": 0.8434842228889465, "learning_rate": 3.1588895261019526e-06, "loss": 2.3051084518432616, "step": 104330 }, { "epoch": 0.8422463130534455, "grad_norm": 1.1824071407318115, "learning_rate": 3.1572740563637394e-06, "loss": 2.297496223449707, "step": 104340 }, { "epoch": 0.8423270343792126, "grad_norm": 1.0056134462356567, "learning_rate": 3.1556585866255262e-06, "loss": 2.8987865447998047, "step": 104350 }, { "epoch": 0.8424077557049797, "grad_norm": 0.6274080872535706, "learning_rate": 3.154043116887313e-06, "loss": 2.222643280029297, "step": 104360 }, { "epoch": 0.8424884770307468, "grad_norm": 1.122206687927246, "learning_rate": 3.1524276471491e-06, "loss": 2.7367084503173826, "step": 104370 }, { "epoch": 0.8425691983565138, "grad_norm": 0.616051971912384, "learning_rate": 3.1508121774108868e-06, "loss": 2.42071418762207, "step": 104380 }, { "epoch": 0.8426499196822809, "grad_norm": 0.660388171672821, "learning_rate": 3.1491967076726736e-06, "loss": 2.302233123779297, "step": 104390 }, { "epoch": 0.842730641008048, "grad_norm": 0.7469543218612671, "learning_rate": 3.1475812379344605e-06, "loss": 2.4994789123535157, "step": 104400 }, { "epoch": 0.8428113623338149, "grad_norm": 1.004451036453247, "learning_rate": 3.1459657681962473e-06, "loss": 2.603835678100586, "step": 104410 }, { "epoch": 0.842892083659582, "grad_norm": 1.1414129734039307, "learning_rate": 3.144350298458034e-06, "loss": 2.6088708877563476, "step": 104420 }, { "epoch": 0.8429728049853491, "grad_norm": 0.984226405620575, "learning_rate": 3.142734828719821e-06, "loss": 2.360591697692871, "step": 104430 }, { "epoch": 0.8430535263111162, "grad_norm": 1.4482866525650024, "learning_rate": 3.141119358981608e-06, "loss": 2.6808422088623045, "step": 104440 }, { "epoch": 0.8431342476368832, "grad_norm": 0.5699906945228577, "learning_rate": 3.1395038892433947e-06, "loss": 1.9931985855102539, "step": 104450 }, { "epoch": 0.8432149689626502, "grad_norm": 1.0034788846969604, "learning_rate": 3.1378884195051815e-06, "loss": 2.4932106018066404, "step": 104460 }, { "epoch": 0.8432956902884173, "grad_norm": 0.8822947144508362, "learning_rate": 3.136272949766969e-06, "loss": 2.7108415603637694, "step": 104470 }, { "epoch": 0.8433764116141843, "grad_norm": 1.6123719215393066, "learning_rate": 3.1346574800287557e-06, "loss": 2.0769739151000977, "step": 104480 }, { "epoch": 0.8434571329399514, "grad_norm": 0.8600870370864868, "learning_rate": 3.1330420102905425e-06, "loss": 2.2100643157958983, "step": 104490 }, { "epoch": 0.8435378542657185, "grad_norm": 0.6437518000602722, "learning_rate": 3.1314265405523294e-06, "loss": 2.166660690307617, "step": 104500 }, { "epoch": 0.8436185755914856, "grad_norm": 0.79758220911026, "learning_rate": 3.129811070814116e-06, "loss": 1.974661636352539, "step": 104510 }, { "epoch": 0.8436992969172525, "grad_norm": 1.0346418619155884, "learning_rate": 3.128195601075903e-06, "loss": 2.8113685607910157, "step": 104520 }, { "epoch": 0.8437800182430196, "grad_norm": 1.6216416358947754, "learning_rate": 3.12658013133769e-06, "loss": 2.4736085891723634, "step": 104530 }, { "epoch": 0.8438607395687867, "grad_norm": 0.9792584180831909, "learning_rate": 3.1249646615994767e-06, "loss": 1.906634521484375, "step": 104540 }, { "epoch": 0.8439414608945537, "grad_norm": 0.9728690981864929, "learning_rate": 3.1233491918612636e-06, "loss": 2.530906867980957, "step": 104550 }, { "epoch": 0.8440221822203208, "grad_norm": 0.6799623966217041, "learning_rate": 3.1217337221230504e-06, "loss": 2.305393028259277, "step": 104560 }, { "epoch": 0.8441029035460879, "grad_norm": 0.8030945062637329, "learning_rate": 3.1201182523848373e-06, "loss": 2.3488393783569337, "step": 104570 }, { "epoch": 0.8441836248718549, "grad_norm": 0.9770193696022034, "learning_rate": 3.118502782646624e-06, "loss": 2.117748832702637, "step": 104580 }, { "epoch": 0.8442643461976219, "grad_norm": 1.3272792100906372, "learning_rate": 3.116887312908411e-06, "loss": 2.253022003173828, "step": 104590 }, { "epoch": 0.844345067523389, "grad_norm": 0.7452824115753174, "learning_rate": 3.115271843170198e-06, "loss": 2.616150665283203, "step": 104600 }, { "epoch": 0.8444257888491561, "grad_norm": 1.0497794151306152, "learning_rate": 3.1136563734319847e-06, "loss": 2.6038469314575194, "step": 104610 }, { "epoch": 0.8445065101749231, "grad_norm": 0.887374222278595, "learning_rate": 3.1120409036937715e-06, "loss": 1.7986618041992188, "step": 104620 }, { "epoch": 0.8445872315006901, "grad_norm": 0.6704338788986206, "learning_rate": 3.1104254339555588e-06, "loss": 2.2178909301757814, "step": 104630 }, { "epoch": 0.8446679528264572, "grad_norm": 1.011211633682251, "learning_rate": 3.1088099642173456e-06, "loss": 2.693271827697754, "step": 104640 }, { "epoch": 0.8447486741522243, "grad_norm": 0.9922680258750916, "learning_rate": 3.107194494479133e-06, "loss": 2.2829139709472654, "step": 104650 }, { "epoch": 0.8448293954779913, "grad_norm": 0.7753508687019348, "learning_rate": 3.1055790247409197e-06, "loss": 2.064223861694336, "step": 104660 }, { "epoch": 0.8449101168037584, "grad_norm": 0.6464439034461975, "learning_rate": 3.1039635550027066e-06, "loss": 2.3827590942382812, "step": 104670 }, { "epoch": 0.8449908381295255, "grad_norm": 0.6936954855918884, "learning_rate": 3.1023480852644934e-06, "loss": 2.250474739074707, "step": 104680 }, { "epoch": 0.8450715594552924, "grad_norm": 1.2483735084533691, "learning_rate": 3.1007326155262803e-06, "loss": 2.8831525802612306, "step": 104690 }, { "epoch": 0.8451522807810595, "grad_norm": 1.6219907999038696, "learning_rate": 3.099117145788067e-06, "loss": 2.6063045501708983, "step": 104700 }, { "epoch": 0.8452330021068266, "grad_norm": 0.9802061319351196, "learning_rate": 3.097501676049854e-06, "loss": 2.2397750854492187, "step": 104710 }, { "epoch": 0.8453137234325937, "grad_norm": 0.7800886034965515, "learning_rate": 3.095886206311641e-06, "loss": 2.4254236221313477, "step": 104720 }, { "epoch": 0.8453944447583607, "grad_norm": 0.749695897102356, "learning_rate": 3.0942707365734276e-06, "loss": 2.261117362976074, "step": 104730 }, { "epoch": 0.8454751660841278, "grad_norm": 0.9328552484512329, "learning_rate": 3.0926552668352145e-06, "loss": 2.419947052001953, "step": 104740 }, { "epoch": 0.8455558874098948, "grad_norm": 1.3499784469604492, "learning_rate": 3.0910397970970013e-06, "loss": 2.2826684951782226, "step": 104750 }, { "epoch": 0.8456366087356618, "grad_norm": 0.6958282589912415, "learning_rate": 3.089424327358788e-06, "loss": 2.160597801208496, "step": 104760 }, { "epoch": 0.8457173300614289, "grad_norm": 1.2392685413360596, "learning_rate": 3.087808857620575e-06, "loss": 2.331979751586914, "step": 104770 }, { "epoch": 0.845798051387196, "grad_norm": 0.8044751286506653, "learning_rate": 3.086193387882362e-06, "loss": 2.386886978149414, "step": 104780 }, { "epoch": 0.8458787727129631, "grad_norm": 0.9322338104248047, "learning_rate": 3.0845779181441487e-06, "loss": 2.124970245361328, "step": 104790 }, { "epoch": 0.84595949403873, "grad_norm": 1.306907057762146, "learning_rate": 3.0829624484059356e-06, "loss": 2.4028461456298826, "step": 104800 }, { "epoch": 0.8460402153644971, "grad_norm": 0.7765279412269592, "learning_rate": 3.0813469786677224e-06, "loss": 2.3668033599853517, "step": 104810 }, { "epoch": 0.8461209366902642, "grad_norm": 0.7205851674079895, "learning_rate": 3.0797315089295093e-06, "loss": 2.1271299362182616, "step": 104820 }, { "epoch": 0.8462016580160313, "grad_norm": 0.4721253216266632, "learning_rate": 3.078116039191296e-06, "loss": 2.481967735290527, "step": 104830 }, { "epoch": 0.8462823793417983, "grad_norm": 1.2268227338790894, "learning_rate": 3.076500569453083e-06, "loss": 2.2179758071899416, "step": 104840 }, { "epoch": 0.8463631006675654, "grad_norm": 0.7788546681404114, "learning_rate": 3.07488509971487e-06, "loss": 2.5079504013061524, "step": 104850 }, { "epoch": 0.8464438219933325, "grad_norm": 0.5862647294998169, "learning_rate": 3.0732696299766566e-06, "loss": 2.8571807861328127, "step": 104860 }, { "epoch": 0.8465245433190994, "grad_norm": 1.1696970462799072, "learning_rate": 3.0716541602384435e-06, "loss": 2.516798973083496, "step": 104870 }, { "epoch": 0.8466052646448665, "grad_norm": 1.1606800556182861, "learning_rate": 3.0700386905002303e-06, "loss": 2.5661869049072266, "step": 104880 }, { "epoch": 0.8466859859706336, "grad_norm": 1.288076400756836, "learning_rate": 3.068423220762017e-06, "loss": 2.4480026245117186, "step": 104890 }, { "epoch": 0.8467667072964007, "grad_norm": 0.9902046322822571, "learning_rate": 3.066807751023804e-06, "loss": 2.6974472045898437, "step": 104900 }, { "epoch": 0.8468474286221677, "grad_norm": 1.6689448356628418, "learning_rate": 3.0651922812855913e-06, "loss": 1.9365331649780273, "step": 104910 }, { "epoch": 0.8469281499479347, "grad_norm": 0.9878016114234924, "learning_rate": 3.063576811547378e-06, "loss": 2.386279487609863, "step": 104920 }, { "epoch": 0.8470088712737018, "grad_norm": 0.6481048464775085, "learning_rate": 3.061961341809165e-06, "loss": 2.2164840698242188, "step": 104930 }, { "epoch": 0.8470895925994688, "grad_norm": 1.8006858825683594, "learning_rate": 3.060345872070952e-06, "loss": 2.422005271911621, "step": 104940 }, { "epoch": 0.8471703139252359, "grad_norm": 1.0223174095153809, "learning_rate": 3.0587304023327387e-06, "loss": 2.4885339736938477, "step": 104950 }, { "epoch": 0.847251035251003, "grad_norm": 2.793286085128784, "learning_rate": 3.0571149325945255e-06, "loss": 2.5509042739868164, "step": 104960 }, { "epoch": 0.8473317565767701, "grad_norm": 0.8857611417770386, "learning_rate": 3.0554994628563124e-06, "loss": 2.166852569580078, "step": 104970 }, { "epoch": 0.847412477902537, "grad_norm": 0.7111566066741943, "learning_rate": 3.0538839931180992e-06, "loss": 2.129180145263672, "step": 104980 }, { "epoch": 0.8474931992283041, "grad_norm": 0.789991557598114, "learning_rate": 3.052268523379886e-06, "loss": 2.3637357711791993, "step": 104990 }, { "epoch": 0.8475739205540712, "grad_norm": 0.592108964920044, "learning_rate": 3.050653053641673e-06, "loss": 1.9124845504760741, "step": 105000 }, { "epoch": 0.8476546418798382, "grad_norm": 0.7711182832717896, "learning_rate": 3.0490375839034598e-06, "loss": 2.137398910522461, "step": 105010 }, { "epoch": 0.8477353632056053, "grad_norm": 1.2855013608932495, "learning_rate": 3.0474221141652466e-06, "loss": 2.463215446472168, "step": 105020 }, { "epoch": 0.8478160845313724, "grad_norm": 0.9846559762954712, "learning_rate": 3.0458066444270334e-06, "loss": 2.203093719482422, "step": 105030 }, { "epoch": 0.8478968058571394, "grad_norm": 1.0598262548446655, "learning_rate": 3.0441911746888203e-06, "loss": 2.5739713668823243, "step": 105040 }, { "epoch": 0.8479775271829064, "grad_norm": 0.8787329792976379, "learning_rate": 3.042575704950607e-06, "loss": 2.2767868041992188, "step": 105050 }, { "epoch": 0.8480582485086735, "grad_norm": 0.7563525438308716, "learning_rate": 3.040960235212394e-06, "loss": 2.1175336837768555, "step": 105060 }, { "epoch": 0.8481389698344406, "grad_norm": 1.0595496892929077, "learning_rate": 3.039344765474181e-06, "loss": 2.389424133300781, "step": 105070 }, { "epoch": 0.8482196911602076, "grad_norm": 0.9663013219833374, "learning_rate": 3.0377292957359677e-06, "loss": 2.4621992111206055, "step": 105080 }, { "epoch": 0.8483004124859747, "grad_norm": 1.226782202720642, "learning_rate": 3.0361138259977545e-06, "loss": 2.312892723083496, "step": 105090 }, { "epoch": 0.8483811338117417, "grad_norm": 0.517297089099884, "learning_rate": 3.0344983562595414e-06, "loss": 3.0438201904296873, "step": 105100 }, { "epoch": 0.8484618551375088, "grad_norm": 0.8135592937469482, "learning_rate": 3.032882886521328e-06, "loss": 2.0520397186279298, "step": 105110 }, { "epoch": 0.8485425764632758, "grad_norm": 0.9977691769599915, "learning_rate": 3.031267416783115e-06, "loss": 2.395132064819336, "step": 105120 }, { "epoch": 0.8486232977890429, "grad_norm": 0.9798172116279602, "learning_rate": 3.029651947044902e-06, "loss": 2.4826955795288086, "step": 105130 }, { "epoch": 0.84870401911481, "grad_norm": 1.057464361190796, "learning_rate": 3.0280364773066887e-06, "loss": 2.118842887878418, "step": 105140 }, { "epoch": 0.848784740440577, "grad_norm": 0.6878368854522705, "learning_rate": 3.0264210075684756e-06, "loss": 2.3506647109985352, "step": 105150 }, { "epoch": 0.848865461766344, "grad_norm": 0.8945212960243225, "learning_rate": 3.024805537830263e-06, "loss": 2.4355005264282226, "step": 105160 }, { "epoch": 0.8489461830921111, "grad_norm": 0.6462516188621521, "learning_rate": 3.0231900680920497e-06, "loss": 2.268960952758789, "step": 105170 }, { "epoch": 0.8490269044178782, "grad_norm": 0.5395233631134033, "learning_rate": 3.0215745983538366e-06, "loss": 2.5794458389282227, "step": 105180 }, { "epoch": 0.8491076257436452, "grad_norm": 1.4202021360397339, "learning_rate": 3.0199591286156234e-06, "loss": 2.8567792892456056, "step": 105190 }, { "epoch": 0.8491883470694123, "grad_norm": 0.7763989567756653, "learning_rate": 3.0183436588774102e-06, "loss": 2.1203693389892577, "step": 105200 }, { "epoch": 0.8492690683951793, "grad_norm": 0.632929265499115, "learning_rate": 3.016728189139197e-06, "loss": 2.311207962036133, "step": 105210 }, { "epoch": 0.8493497897209463, "grad_norm": 0.6114556789398193, "learning_rate": 3.015112719400984e-06, "loss": 2.001829147338867, "step": 105220 }, { "epoch": 0.8494305110467134, "grad_norm": 0.9489281177520752, "learning_rate": 3.0134972496627708e-06, "loss": 2.5850114822387695, "step": 105230 }, { "epoch": 0.8495112323724805, "grad_norm": 1.1625065803527832, "learning_rate": 3.0118817799245576e-06, "loss": 2.5754068374633787, "step": 105240 }, { "epoch": 0.8495919536982476, "grad_norm": 0.8348303437232971, "learning_rate": 3.0102663101863445e-06, "loss": 2.507084274291992, "step": 105250 }, { "epoch": 0.8496726750240146, "grad_norm": 1.123368263244629, "learning_rate": 3.0086508404481317e-06, "loss": 2.6158050537109374, "step": 105260 }, { "epoch": 0.8497533963497816, "grad_norm": 0.9940230250358582, "learning_rate": 3.0070353707099186e-06, "loss": 2.241611862182617, "step": 105270 }, { "epoch": 0.8498341176755487, "grad_norm": 1.3208032846450806, "learning_rate": 3.0054199009717054e-06, "loss": 2.7991863250732423, "step": 105280 }, { "epoch": 0.8499148390013157, "grad_norm": 1.7559754848480225, "learning_rate": 3.0038044312334923e-06, "loss": 2.386674499511719, "step": 105290 }, { "epoch": 0.8499955603270828, "grad_norm": 0.4867859482765198, "learning_rate": 3.002188961495279e-06, "loss": 2.1436956405639647, "step": 105300 }, { "epoch": 0.8500762816528499, "grad_norm": 0.5711582899093628, "learning_rate": 3.000573491757066e-06, "loss": 2.1856239318847654, "step": 105310 }, { "epoch": 0.850157002978617, "grad_norm": 0.8986943364143372, "learning_rate": 2.998958022018853e-06, "loss": 2.600709915161133, "step": 105320 }, { "epoch": 0.8502377243043839, "grad_norm": 0.7685781717300415, "learning_rate": 2.9973425522806397e-06, "loss": 2.057308578491211, "step": 105330 }, { "epoch": 0.850318445630151, "grad_norm": 1.0868278741836548, "learning_rate": 2.995727082542427e-06, "loss": 2.6154727935791016, "step": 105340 }, { "epoch": 0.8503991669559181, "grad_norm": 1.1975200176239014, "learning_rate": 2.9941116128042138e-06, "loss": 2.745512008666992, "step": 105350 }, { "epoch": 0.8504798882816852, "grad_norm": 0.7359670996665955, "learning_rate": 2.9924961430660006e-06, "loss": 2.5524223327636717, "step": 105360 }, { "epoch": 0.8505606096074522, "grad_norm": 0.9001826643943787, "learning_rate": 2.9908806733277875e-06, "loss": 2.3988470077514648, "step": 105370 }, { "epoch": 0.8506413309332193, "grad_norm": 0.8486494421958923, "learning_rate": 2.9892652035895743e-06, "loss": 2.471377944946289, "step": 105380 }, { "epoch": 0.8507220522589863, "grad_norm": 0.7032608985900879, "learning_rate": 2.987649733851361e-06, "loss": 2.2774690628051757, "step": 105390 }, { "epoch": 0.8508027735847533, "grad_norm": 0.6026580929756165, "learning_rate": 2.986034264113148e-06, "loss": 1.7612815856933595, "step": 105400 }, { "epoch": 0.8508834949105204, "grad_norm": 0.6637555956840515, "learning_rate": 2.984418794374935e-06, "loss": 2.01940975189209, "step": 105410 }, { "epoch": 0.8509642162362875, "grad_norm": 1.0713051557540894, "learning_rate": 2.9828033246367217e-06, "loss": 2.1913957595825195, "step": 105420 }, { "epoch": 0.8510449375620546, "grad_norm": 0.8090303540229797, "learning_rate": 2.9811878548985085e-06, "loss": 2.351680374145508, "step": 105430 }, { "epoch": 0.8511256588878215, "grad_norm": 0.8009364008903503, "learning_rate": 2.9795723851602954e-06, "loss": 2.062898635864258, "step": 105440 }, { "epoch": 0.8512063802135886, "grad_norm": 1.2046849727630615, "learning_rate": 2.9779569154220822e-06, "loss": 2.1597082138061525, "step": 105450 }, { "epoch": 0.8512871015393557, "grad_norm": 1.0553897619247437, "learning_rate": 2.976341445683869e-06, "loss": 2.405888557434082, "step": 105460 }, { "epoch": 0.8513678228651227, "grad_norm": 0.570982038974762, "learning_rate": 2.974725975945656e-06, "loss": 2.2037153244018555, "step": 105470 }, { "epoch": 0.8514485441908898, "grad_norm": 0.9304159283638, "learning_rate": 2.9731105062074428e-06, "loss": 2.543801498413086, "step": 105480 }, { "epoch": 0.8515292655166569, "grad_norm": 0.9712460041046143, "learning_rate": 2.9714950364692296e-06, "loss": 2.572675323486328, "step": 105490 }, { "epoch": 0.851609986842424, "grad_norm": 1.5517574548721313, "learning_rate": 2.9698795667310165e-06, "loss": 2.6165725708007814, "step": 105500 }, { "epoch": 0.8516907081681909, "grad_norm": 0.8576627373695374, "learning_rate": 2.9682640969928033e-06, "loss": 2.191928672790527, "step": 105510 }, { "epoch": 0.851771429493958, "grad_norm": 1.2115730047225952, "learning_rate": 2.96664862725459e-06, "loss": 2.400003433227539, "step": 105520 }, { "epoch": 0.8518521508197251, "grad_norm": 1.165318250656128, "learning_rate": 2.965033157516377e-06, "loss": 2.4649858474731445, "step": 105530 }, { "epoch": 0.8519328721454921, "grad_norm": 0.7739078402519226, "learning_rate": 2.963417687778164e-06, "loss": 2.4018377304077148, "step": 105540 }, { "epoch": 0.8520135934712592, "grad_norm": 1.4359049797058105, "learning_rate": 2.9618022180399507e-06, "loss": 2.284368324279785, "step": 105550 }, { "epoch": 0.8520943147970262, "grad_norm": 0.7038776874542236, "learning_rate": 2.9601867483017375e-06, "loss": 2.6137584686279296, "step": 105560 }, { "epoch": 0.8521750361227933, "grad_norm": 0.6251943707466125, "learning_rate": 2.9585712785635244e-06, "loss": 2.463099479675293, "step": 105570 }, { "epoch": 0.8522557574485603, "grad_norm": 0.9325305819511414, "learning_rate": 2.9569558088253112e-06, "loss": 2.8032739639282225, "step": 105580 }, { "epoch": 0.8523364787743274, "grad_norm": 1.3310109376907349, "learning_rate": 2.955340339087098e-06, "loss": 2.306658935546875, "step": 105590 }, { "epoch": 0.8524172001000945, "grad_norm": 0.9252294898033142, "learning_rate": 2.9537248693488853e-06, "loss": 2.4026412963867188, "step": 105600 }, { "epoch": 0.8524979214258614, "grad_norm": 0.7952293753623962, "learning_rate": 2.952109399610672e-06, "loss": 2.2522130966186524, "step": 105610 }, { "epoch": 0.8525786427516285, "grad_norm": 2.7918128967285156, "learning_rate": 2.950493929872459e-06, "loss": 2.4676700592041017, "step": 105620 }, { "epoch": 0.8526593640773956, "grad_norm": 0.4863015413284302, "learning_rate": 2.948878460134246e-06, "loss": 1.9856401443481446, "step": 105630 }, { "epoch": 0.8527400854031627, "grad_norm": 1.9414825439453125, "learning_rate": 2.9472629903960327e-06, "loss": 2.6280874252319335, "step": 105640 }, { "epoch": 0.8528208067289297, "grad_norm": 0.6706811189651489, "learning_rate": 2.9456475206578196e-06, "loss": 2.1472942352294924, "step": 105650 }, { "epoch": 0.8529015280546968, "grad_norm": 1.10331392288208, "learning_rate": 2.9440320509196064e-06, "loss": 2.565724754333496, "step": 105660 }, { "epoch": 0.8529822493804639, "grad_norm": 0.8108953833580017, "learning_rate": 2.9424165811813933e-06, "loss": 2.132547950744629, "step": 105670 }, { "epoch": 0.8530629707062308, "grad_norm": 1.378285527229309, "learning_rate": 2.94080111144318e-06, "loss": 2.2656036376953126, "step": 105680 }, { "epoch": 0.8531436920319979, "grad_norm": 0.8368456363677979, "learning_rate": 2.939185641704967e-06, "loss": 2.620387077331543, "step": 105690 }, { "epoch": 0.853224413357765, "grad_norm": 0.6141499280929565, "learning_rate": 2.937570171966754e-06, "loss": 2.399352264404297, "step": 105700 }, { "epoch": 0.8533051346835321, "grad_norm": 1.0393893718719482, "learning_rate": 2.9359547022285406e-06, "loss": 2.7986175537109377, "step": 105710 }, { "epoch": 0.8533858560092991, "grad_norm": 1.1102055311203003, "learning_rate": 2.9343392324903275e-06, "loss": 2.410162353515625, "step": 105720 }, { "epoch": 0.8534665773350661, "grad_norm": 1.2537319660186768, "learning_rate": 2.9327237627521143e-06, "loss": 2.522003936767578, "step": 105730 }, { "epoch": 0.8535472986608332, "grad_norm": 0.7938430905342102, "learning_rate": 2.931108293013901e-06, "loss": 2.224906158447266, "step": 105740 }, { "epoch": 0.8536280199866002, "grad_norm": 0.9575185179710388, "learning_rate": 2.929492823275688e-06, "loss": 2.596244239807129, "step": 105750 }, { "epoch": 0.8537087413123673, "grad_norm": 0.7446215152740479, "learning_rate": 2.927877353537475e-06, "loss": 2.604473114013672, "step": 105760 }, { "epoch": 0.8537894626381344, "grad_norm": 1.5501030683517456, "learning_rate": 2.9262618837992617e-06, "loss": 2.2768156051635744, "step": 105770 }, { "epoch": 0.8538701839639015, "grad_norm": 1.0301917791366577, "learning_rate": 2.9246464140610486e-06, "loss": 2.1515266418457033, "step": 105780 }, { "epoch": 0.8539509052896684, "grad_norm": 0.9249579906463623, "learning_rate": 2.9230309443228354e-06, "loss": 2.7432769775390624, "step": 105790 }, { "epoch": 0.8540316266154355, "grad_norm": 0.857858419418335, "learning_rate": 2.9214154745846223e-06, "loss": 2.4551628112792967, "step": 105800 }, { "epoch": 0.8541123479412026, "grad_norm": 1.5658841133117676, "learning_rate": 2.919800004846409e-06, "loss": 2.264528274536133, "step": 105810 }, { "epoch": 0.8541930692669697, "grad_norm": 0.9509256482124329, "learning_rate": 2.918184535108196e-06, "loss": 2.7450233459472657, "step": 105820 }, { "epoch": 0.8542737905927367, "grad_norm": 0.7721014618873596, "learning_rate": 2.916569065369983e-06, "loss": 2.4875526428222656, "step": 105830 }, { "epoch": 0.8543545119185038, "grad_norm": 0.6438537240028381, "learning_rate": 2.9149535956317696e-06, "loss": 2.3415122985839845, "step": 105840 }, { "epoch": 0.8544352332442708, "grad_norm": 1.1515930891036987, "learning_rate": 2.9133381258935565e-06, "loss": 2.1739044189453125, "step": 105850 }, { "epoch": 0.8545159545700378, "grad_norm": 0.931185245513916, "learning_rate": 2.9117226561553438e-06, "loss": 2.3298952102661135, "step": 105860 }, { "epoch": 0.8545966758958049, "grad_norm": 0.8368271589279175, "learning_rate": 2.9101071864171306e-06, "loss": 2.7910392761230467, "step": 105870 }, { "epoch": 0.854677397221572, "grad_norm": 0.7116407155990601, "learning_rate": 2.9084917166789174e-06, "loss": 2.333097457885742, "step": 105880 }, { "epoch": 0.8547581185473391, "grad_norm": 0.45961666107177734, "learning_rate": 2.9068762469407047e-06, "loss": 2.366242790222168, "step": 105890 }, { "epoch": 0.854838839873106, "grad_norm": 0.7324826717376709, "learning_rate": 2.9052607772024916e-06, "loss": 2.3224489212036135, "step": 105900 }, { "epoch": 0.8549195611988731, "grad_norm": 0.6538263559341431, "learning_rate": 2.9036453074642784e-06, "loss": 2.4787269592285157, "step": 105910 }, { "epoch": 0.8550002825246402, "grad_norm": 0.7140093445777893, "learning_rate": 2.9020298377260653e-06, "loss": 2.324771499633789, "step": 105920 }, { "epoch": 0.8550810038504072, "grad_norm": 0.8404399752616882, "learning_rate": 2.900414367987852e-06, "loss": 2.808279037475586, "step": 105930 }, { "epoch": 0.8551617251761743, "grad_norm": 1.3487098217010498, "learning_rate": 2.898798898249639e-06, "loss": 2.435904693603516, "step": 105940 }, { "epoch": 0.8552424465019414, "grad_norm": 1.0231761932373047, "learning_rate": 2.897183428511426e-06, "loss": 2.3324581146240235, "step": 105950 }, { "epoch": 0.8553231678277085, "grad_norm": 1.0718121528625488, "learning_rate": 2.8955679587732126e-06, "loss": 2.4580183029174805, "step": 105960 }, { "epoch": 0.8554038891534754, "grad_norm": 0.9857177734375, "learning_rate": 2.8939524890349995e-06, "loss": 2.6876169204711915, "step": 105970 }, { "epoch": 0.8554846104792425, "grad_norm": 1.7321350574493408, "learning_rate": 2.8923370192967863e-06, "loss": 2.797519302368164, "step": 105980 }, { "epoch": 0.8555653318050096, "grad_norm": 0.8693088293075562, "learning_rate": 2.890721549558573e-06, "loss": 2.552290916442871, "step": 105990 }, { "epoch": 0.8556460531307766, "grad_norm": 1.080254316329956, "learning_rate": 2.88910607982036e-06, "loss": 2.7162754058837892, "step": 106000 }, { "epoch": 0.8557267744565437, "grad_norm": 0.898438572883606, "learning_rate": 2.887490610082147e-06, "loss": 2.8706809997558596, "step": 106010 }, { "epoch": 0.8558074957823107, "grad_norm": 0.723341166973114, "learning_rate": 2.8858751403439337e-06, "loss": 2.3486730575561525, "step": 106020 }, { "epoch": 0.8558882171080778, "grad_norm": 0.983079195022583, "learning_rate": 2.8842596706057206e-06, "loss": 2.2889640808105467, "step": 106030 }, { "epoch": 0.8559689384338448, "grad_norm": 0.8161702752113342, "learning_rate": 2.882644200867508e-06, "loss": 2.3486249923706053, "step": 106040 }, { "epoch": 0.8560496597596119, "grad_norm": 1.1194158792495728, "learning_rate": 2.8810287311292947e-06, "loss": 2.5796276092529298, "step": 106050 }, { "epoch": 0.856130381085379, "grad_norm": 0.9964965581893921, "learning_rate": 2.8794132613910815e-06, "loss": 2.2494577407836913, "step": 106060 }, { "epoch": 0.856211102411146, "grad_norm": 1.954620361328125, "learning_rate": 2.8777977916528684e-06, "loss": 2.853668212890625, "step": 106070 }, { "epoch": 0.856291823736913, "grad_norm": 0.9431893229484558, "learning_rate": 2.876182321914655e-06, "loss": 2.1365156173706055, "step": 106080 }, { "epoch": 0.8563725450626801, "grad_norm": 0.9391046762466431, "learning_rate": 2.874566852176442e-06, "loss": 2.5110246658325197, "step": 106090 }, { "epoch": 0.8564532663884472, "grad_norm": 0.8407296538352966, "learning_rate": 2.872951382438229e-06, "loss": 2.243199920654297, "step": 106100 }, { "epoch": 0.8565339877142142, "grad_norm": 0.9558990001678467, "learning_rate": 2.8713359127000157e-06, "loss": 2.184372901916504, "step": 106110 }, { "epoch": 0.8566147090399813, "grad_norm": 0.9079674482345581, "learning_rate": 2.8697204429618026e-06, "loss": 1.723611068725586, "step": 106120 }, { "epoch": 0.8566954303657484, "grad_norm": 0.7732642889022827, "learning_rate": 2.8681049732235894e-06, "loss": 2.06951789855957, "step": 106130 }, { "epoch": 0.8567761516915153, "grad_norm": 0.7074983716011047, "learning_rate": 2.8664895034853763e-06, "loss": 2.9248546600341796, "step": 106140 }, { "epoch": 0.8568568730172824, "grad_norm": 1.0544490814208984, "learning_rate": 2.864874033747163e-06, "loss": 2.5921405792236327, "step": 106150 }, { "epoch": 0.8569375943430495, "grad_norm": 0.6367230415344238, "learning_rate": 2.86325856400895e-06, "loss": 2.1859210968017577, "step": 106160 }, { "epoch": 0.8570183156688166, "grad_norm": 1.6206802129745483, "learning_rate": 2.861643094270737e-06, "loss": 2.5842905044555664, "step": 106170 }, { "epoch": 0.8570990369945836, "grad_norm": 1.00640869140625, "learning_rate": 2.8600276245325237e-06, "loss": 2.13587646484375, "step": 106180 }, { "epoch": 0.8571797583203506, "grad_norm": 1.5764058828353882, "learning_rate": 2.8584121547943105e-06, "loss": 2.229209136962891, "step": 106190 }, { "epoch": 0.8572604796461177, "grad_norm": 0.7081831693649292, "learning_rate": 2.8567966850560974e-06, "loss": 2.4037616729736326, "step": 106200 }, { "epoch": 0.8573412009718847, "grad_norm": 1.4165822267532349, "learning_rate": 2.855181215317884e-06, "loss": 2.1514522552490236, "step": 106210 }, { "epoch": 0.8574219222976518, "grad_norm": 0.8067988753318787, "learning_rate": 2.853565745579671e-06, "loss": 2.652046966552734, "step": 106220 }, { "epoch": 0.8575026436234189, "grad_norm": 0.6763224005699158, "learning_rate": 2.851950275841458e-06, "loss": 2.132600784301758, "step": 106230 }, { "epoch": 0.857583364949186, "grad_norm": 0.602592408657074, "learning_rate": 2.8503348061032447e-06, "loss": 2.369576644897461, "step": 106240 }, { "epoch": 0.8576640862749529, "grad_norm": 0.919996976852417, "learning_rate": 2.8487193363650316e-06, "loss": 2.5117366790771483, "step": 106250 }, { "epoch": 0.85774480760072, "grad_norm": 0.7871370911598206, "learning_rate": 2.8471038666268184e-06, "loss": 2.372780418395996, "step": 106260 }, { "epoch": 0.8578255289264871, "grad_norm": 0.7315279245376587, "learning_rate": 2.8454883968886053e-06, "loss": 1.8499324798583985, "step": 106270 }, { "epoch": 0.8579062502522541, "grad_norm": 0.7187543511390686, "learning_rate": 2.843872927150392e-06, "loss": 2.065089225769043, "step": 106280 }, { "epoch": 0.8579869715780212, "grad_norm": 1.3148902654647827, "learning_rate": 2.842257457412179e-06, "loss": 2.5314519882202147, "step": 106290 }, { "epoch": 0.8580676929037883, "grad_norm": 1.4617464542388916, "learning_rate": 2.8406419876739662e-06, "loss": 2.2991405487060548, "step": 106300 }, { "epoch": 0.8581484142295553, "grad_norm": 2.20937180519104, "learning_rate": 2.839026517935753e-06, "loss": 2.5835750579833983, "step": 106310 }, { "epoch": 0.8582291355553223, "grad_norm": 1.268935203552246, "learning_rate": 2.83741104819754e-06, "loss": 2.2201318740844727, "step": 106320 }, { "epoch": 0.8583098568810894, "grad_norm": 0.9911332130432129, "learning_rate": 2.8357955784593268e-06, "loss": 2.480222702026367, "step": 106330 }, { "epoch": 0.8583905782068565, "grad_norm": 1.0045568943023682, "learning_rate": 2.8341801087211136e-06, "loss": 2.1807697296142576, "step": 106340 }, { "epoch": 0.8584712995326236, "grad_norm": 0.5455338358879089, "learning_rate": 2.8325646389829005e-06, "loss": 2.9023624420166017, "step": 106350 }, { "epoch": 0.8585520208583906, "grad_norm": 1.1458468437194824, "learning_rate": 2.8309491692446873e-06, "loss": 2.440175437927246, "step": 106360 }, { "epoch": 0.8586327421841576, "grad_norm": 1.0343035459518433, "learning_rate": 2.829333699506474e-06, "loss": 2.590633773803711, "step": 106370 }, { "epoch": 0.8587134635099247, "grad_norm": 1.2235264778137207, "learning_rate": 2.827718229768261e-06, "loss": 2.513991928100586, "step": 106380 }, { "epoch": 0.8587941848356917, "grad_norm": 0.9090741872787476, "learning_rate": 2.826102760030048e-06, "loss": 2.275765037536621, "step": 106390 }, { "epoch": 0.8588749061614588, "grad_norm": 0.6500800848007202, "learning_rate": 2.8244872902918347e-06, "loss": 2.6461957931518554, "step": 106400 }, { "epoch": 0.8589556274872259, "grad_norm": 0.8303571939468384, "learning_rate": 2.8228718205536215e-06, "loss": 2.1559656143188475, "step": 106410 }, { "epoch": 0.859036348812993, "grad_norm": 0.8723812103271484, "learning_rate": 2.8212563508154084e-06, "loss": 2.823792266845703, "step": 106420 }, { "epoch": 0.8591170701387599, "grad_norm": 0.868888795375824, "learning_rate": 2.8196408810771952e-06, "loss": 2.0335113525390627, "step": 106430 }, { "epoch": 0.859197791464527, "grad_norm": 0.9174028038978577, "learning_rate": 2.818025411338982e-06, "loss": 2.1918067932128906, "step": 106440 }, { "epoch": 0.8592785127902941, "grad_norm": 1.5565105676651, "learning_rate": 2.816409941600769e-06, "loss": 2.969147872924805, "step": 106450 }, { "epoch": 0.8593592341160611, "grad_norm": 1.959604263305664, "learning_rate": 2.8147944718625558e-06, "loss": 2.5371530532836912, "step": 106460 }, { "epoch": 0.8594399554418282, "grad_norm": 1.3872159719467163, "learning_rate": 2.8131790021243426e-06, "loss": 2.203282928466797, "step": 106470 }, { "epoch": 0.8595206767675952, "grad_norm": 0.942584216594696, "learning_rate": 2.8115635323861295e-06, "loss": 2.1999183654785157, "step": 106480 }, { "epoch": 0.8596013980933623, "grad_norm": 0.650032103061676, "learning_rate": 2.8099480626479163e-06, "loss": 2.273737907409668, "step": 106490 }, { "epoch": 0.8596821194191293, "grad_norm": 0.8792474269866943, "learning_rate": 2.808332592909703e-06, "loss": 2.4343191146850587, "step": 106500 }, { "epoch": 0.8597628407448964, "grad_norm": 0.7533249855041504, "learning_rate": 2.80671712317149e-06, "loss": 2.235317611694336, "step": 106510 }, { "epoch": 0.8598435620706635, "grad_norm": 0.6130419373512268, "learning_rate": 2.8051016534332777e-06, "loss": 2.436808395385742, "step": 106520 }, { "epoch": 0.8599242833964305, "grad_norm": 1.1945444345474243, "learning_rate": 2.8034861836950645e-06, "loss": 2.151136779785156, "step": 106530 }, { "epoch": 0.8600050047221975, "grad_norm": 1.0044623613357544, "learning_rate": 2.8018707139568514e-06, "loss": 2.629638671875, "step": 106540 }, { "epoch": 0.8600857260479646, "grad_norm": 0.9098949432373047, "learning_rate": 2.8002552442186382e-06, "loss": 2.2234086990356445, "step": 106550 }, { "epoch": 0.8601664473737317, "grad_norm": 0.9576996564865112, "learning_rate": 2.798639774480425e-06, "loss": 2.197732353210449, "step": 106560 }, { "epoch": 0.8602471686994987, "grad_norm": 0.6597111821174622, "learning_rate": 2.797024304742212e-06, "loss": 2.3793859481811523, "step": 106570 }, { "epoch": 0.8603278900252658, "grad_norm": 1.6127058267593384, "learning_rate": 2.7954088350039988e-06, "loss": 3.222952651977539, "step": 106580 }, { "epoch": 0.8604086113510329, "grad_norm": 0.9504587650299072, "learning_rate": 2.7937933652657856e-06, "loss": 2.928761672973633, "step": 106590 }, { "epoch": 0.8604893326767998, "grad_norm": 0.8891896605491638, "learning_rate": 2.7921778955275725e-06, "loss": 2.3380914688110352, "step": 106600 }, { "epoch": 0.8605700540025669, "grad_norm": 1.4377024173736572, "learning_rate": 2.7905624257893593e-06, "loss": 2.4443923950195314, "step": 106610 }, { "epoch": 0.860650775328334, "grad_norm": 1.2788413763046265, "learning_rate": 2.788946956051146e-06, "loss": 2.3800790786743162, "step": 106620 }, { "epoch": 0.8607314966541011, "grad_norm": 0.6554157733917236, "learning_rate": 2.787331486312933e-06, "loss": 2.9963964462280273, "step": 106630 }, { "epoch": 0.8608122179798681, "grad_norm": 4.003816604614258, "learning_rate": 2.78571601657472e-06, "loss": 2.5348350524902346, "step": 106640 }, { "epoch": 0.8608929393056352, "grad_norm": 0.7873334884643555, "learning_rate": 2.7841005468365067e-06, "loss": 2.5332435607910155, "step": 106650 }, { "epoch": 0.8609736606314022, "grad_norm": 0.6944889426231384, "learning_rate": 2.7824850770982935e-06, "loss": 2.1822179794311523, "step": 106660 }, { "epoch": 0.8610543819571692, "grad_norm": 1.1468673944473267, "learning_rate": 2.7808696073600804e-06, "loss": 2.540242385864258, "step": 106670 }, { "epoch": 0.8611351032829363, "grad_norm": 0.7528722286224365, "learning_rate": 2.7792541376218672e-06, "loss": 2.0469381332397463, "step": 106680 }, { "epoch": 0.8612158246087034, "grad_norm": 0.9502074122428894, "learning_rate": 2.777638667883654e-06, "loss": 2.266611099243164, "step": 106690 }, { "epoch": 0.8612965459344705, "grad_norm": 0.880303144454956, "learning_rate": 2.776023198145441e-06, "loss": 2.3687267303466797, "step": 106700 }, { "epoch": 0.8613772672602374, "grad_norm": 0.9822019934654236, "learning_rate": 2.7744077284072278e-06, "loss": 2.4123817443847657, "step": 106710 }, { "epoch": 0.8614579885860045, "grad_norm": 1.0911890268325806, "learning_rate": 2.7727922586690146e-06, "loss": 2.2294666290283205, "step": 106720 }, { "epoch": 0.8615387099117716, "grad_norm": 0.7609921097755432, "learning_rate": 2.771176788930802e-06, "loss": 2.4693910598754885, "step": 106730 }, { "epoch": 0.8616194312375386, "grad_norm": 0.7367367744445801, "learning_rate": 2.7695613191925887e-06, "loss": 2.3886220932006834, "step": 106740 }, { "epoch": 0.8617001525633057, "grad_norm": 0.6725501418113708, "learning_rate": 2.7679458494543756e-06, "loss": 2.181970405578613, "step": 106750 }, { "epoch": 0.8617808738890728, "grad_norm": 0.8669759631156921, "learning_rate": 2.7663303797161624e-06, "loss": 2.182778549194336, "step": 106760 }, { "epoch": 0.8618615952148398, "grad_norm": 0.7241351008415222, "learning_rate": 2.7647149099779493e-06, "loss": 2.5705005645751955, "step": 106770 }, { "epoch": 0.8619423165406068, "grad_norm": 1.3353145122528076, "learning_rate": 2.763099440239736e-06, "loss": 2.4974948883056642, "step": 106780 }, { "epoch": 0.8620230378663739, "grad_norm": 0.8767736554145813, "learning_rate": 2.761483970501523e-06, "loss": 2.401096153259277, "step": 106790 }, { "epoch": 0.862103759192141, "grad_norm": 0.702675461769104, "learning_rate": 2.75986850076331e-06, "loss": 2.087270736694336, "step": 106800 }, { "epoch": 0.8621844805179081, "grad_norm": 1.5663729906082153, "learning_rate": 2.7582530310250966e-06, "loss": 2.15205078125, "step": 106810 }, { "epoch": 0.862265201843675, "grad_norm": 0.6493465304374695, "learning_rate": 2.7566375612868835e-06, "loss": 2.0682561874389647, "step": 106820 }, { "epoch": 0.8623459231694421, "grad_norm": 1.134114384651184, "learning_rate": 2.7550220915486703e-06, "loss": 2.552178955078125, "step": 106830 }, { "epoch": 0.8624266444952092, "grad_norm": 0.947464644908905, "learning_rate": 2.753406621810457e-06, "loss": 2.9739219665527346, "step": 106840 }, { "epoch": 0.8625073658209762, "grad_norm": 1.0947468280792236, "learning_rate": 2.751791152072244e-06, "loss": 1.9368362426757812, "step": 106850 }, { "epoch": 0.8625880871467433, "grad_norm": 0.6745449304580688, "learning_rate": 2.750175682334031e-06, "loss": 2.2055278778076173, "step": 106860 }, { "epoch": 0.8626688084725104, "grad_norm": 0.8978681564331055, "learning_rate": 2.7485602125958177e-06, "loss": 2.2056556701660157, "step": 106870 }, { "epoch": 0.8627495297982775, "grad_norm": 0.877579391002655, "learning_rate": 2.7469447428576046e-06, "loss": 2.031138801574707, "step": 106880 }, { "epoch": 0.8628302511240444, "grad_norm": 0.9433025121688843, "learning_rate": 2.7453292731193914e-06, "loss": 2.6341428756713867, "step": 106890 }, { "epoch": 0.8629109724498115, "grad_norm": 1.0649356842041016, "learning_rate": 2.7437138033811782e-06, "loss": 2.282846450805664, "step": 106900 }, { "epoch": 0.8629916937755786, "grad_norm": 0.9584808945655823, "learning_rate": 2.742098333642965e-06, "loss": 2.25885124206543, "step": 106910 }, { "epoch": 0.8630724151013456, "grad_norm": 0.9375207424163818, "learning_rate": 2.740482863904752e-06, "loss": 2.877343940734863, "step": 106920 }, { "epoch": 0.8631531364271127, "grad_norm": 1.5684559345245361, "learning_rate": 2.7388673941665388e-06, "loss": 2.2992034912109376, "step": 106930 }, { "epoch": 0.8632338577528798, "grad_norm": 0.9308012127876282, "learning_rate": 2.7372519244283256e-06, "loss": 2.840675163269043, "step": 106940 }, { "epoch": 0.8633145790786468, "grad_norm": 0.8712736964225769, "learning_rate": 2.7356364546901125e-06, "loss": 1.9690752029418945, "step": 106950 }, { "epoch": 0.8633953004044138, "grad_norm": 1.0015027523040771, "learning_rate": 2.7340209849518993e-06, "loss": 2.3158477783203124, "step": 106960 }, { "epoch": 0.8634760217301809, "grad_norm": 0.8235952258110046, "learning_rate": 2.732405515213686e-06, "loss": 2.258017158508301, "step": 106970 }, { "epoch": 0.863556743055948, "grad_norm": 1.3777470588684082, "learning_rate": 2.730790045475473e-06, "loss": 2.4704305648803713, "step": 106980 }, { "epoch": 0.863637464381715, "grad_norm": 0.8755753040313721, "learning_rate": 2.7291745757372603e-06, "loss": 1.934911346435547, "step": 106990 }, { "epoch": 0.863718185707482, "grad_norm": 1.132537841796875, "learning_rate": 2.727559105999047e-06, "loss": 2.349695014953613, "step": 107000 }, { "epoch": 0.8637989070332491, "grad_norm": 0.8674097657203674, "learning_rate": 2.725943636260834e-06, "loss": 2.7534553527832033, "step": 107010 }, { "epoch": 0.8638796283590162, "grad_norm": 0.8985578417778015, "learning_rate": 2.724328166522621e-06, "loss": 2.370242691040039, "step": 107020 }, { "epoch": 0.8639603496847832, "grad_norm": 1.0794906616210938, "learning_rate": 2.7227126967844077e-06, "loss": 2.375589942932129, "step": 107030 }, { "epoch": 0.8640410710105503, "grad_norm": 1.120977520942688, "learning_rate": 2.7210972270461945e-06, "loss": 2.0317617416381837, "step": 107040 }, { "epoch": 0.8641217923363174, "grad_norm": 1.4139490127563477, "learning_rate": 2.7194817573079814e-06, "loss": 2.4918285369873048, "step": 107050 }, { "epoch": 0.8642025136620843, "grad_norm": 1.0496783256530762, "learning_rate": 2.717866287569768e-06, "loss": 2.3082508087158202, "step": 107060 }, { "epoch": 0.8642832349878514, "grad_norm": 2.2066292762756348, "learning_rate": 2.716250817831555e-06, "loss": 2.6322372436523436, "step": 107070 }, { "epoch": 0.8643639563136185, "grad_norm": 0.9203810691833496, "learning_rate": 2.714635348093342e-06, "loss": 1.9985387802124024, "step": 107080 }, { "epoch": 0.8644446776393856, "grad_norm": 0.6624962091445923, "learning_rate": 2.7130198783551287e-06, "loss": 2.358032989501953, "step": 107090 }, { "epoch": 0.8645253989651526, "grad_norm": 1.001870036125183, "learning_rate": 2.7114044086169156e-06, "loss": 2.6085596084594727, "step": 107100 }, { "epoch": 0.8646061202909197, "grad_norm": 0.9140334129333496, "learning_rate": 2.7097889388787024e-06, "loss": 2.176741027832031, "step": 107110 }, { "epoch": 0.8646868416166867, "grad_norm": 0.8763079047203064, "learning_rate": 2.7081734691404893e-06, "loss": 2.1505861282348633, "step": 107120 }, { "epoch": 0.8647675629424537, "grad_norm": 1.0803414583206177, "learning_rate": 2.706557999402276e-06, "loss": 1.978968620300293, "step": 107130 }, { "epoch": 0.8648482842682208, "grad_norm": 0.6187257766723633, "learning_rate": 2.7049425296640634e-06, "loss": 2.5113498687744142, "step": 107140 }, { "epoch": 0.8649290055939879, "grad_norm": 1.1897368431091309, "learning_rate": 2.7033270599258502e-06, "loss": 2.5363664627075195, "step": 107150 }, { "epoch": 0.865009726919755, "grad_norm": 0.7637626528739929, "learning_rate": 2.701711590187637e-06, "loss": 2.3618585586547853, "step": 107160 }, { "epoch": 0.865090448245522, "grad_norm": 0.74632728099823, "learning_rate": 2.7000961204494244e-06, "loss": 2.418160820007324, "step": 107170 }, { "epoch": 0.865171169571289, "grad_norm": 0.8608784675598145, "learning_rate": 2.698480650711211e-06, "loss": 2.4959978103637694, "step": 107180 }, { "epoch": 0.8652518908970561, "grad_norm": 0.7825969457626343, "learning_rate": 2.696865180972998e-06, "loss": 2.3849925994873047, "step": 107190 }, { "epoch": 0.8653326122228231, "grad_norm": 0.9908862709999084, "learning_rate": 2.695249711234785e-06, "loss": 2.560496520996094, "step": 107200 }, { "epoch": 0.8654133335485902, "grad_norm": 1.0346473455429077, "learning_rate": 2.6936342414965717e-06, "loss": 2.4148174285888673, "step": 107210 }, { "epoch": 0.8654940548743573, "grad_norm": 0.7832197546958923, "learning_rate": 2.6920187717583586e-06, "loss": 2.5936059951782227, "step": 107220 }, { "epoch": 0.8655747762001244, "grad_norm": 1.005276083946228, "learning_rate": 2.6904033020201454e-06, "loss": 2.5936044692993163, "step": 107230 }, { "epoch": 0.8656554975258913, "grad_norm": 0.7166066765785217, "learning_rate": 2.6887878322819323e-06, "loss": 2.251216506958008, "step": 107240 }, { "epoch": 0.8657362188516584, "grad_norm": 1.422767996788025, "learning_rate": 2.687172362543719e-06, "loss": 2.496113967895508, "step": 107250 }, { "epoch": 0.8658169401774255, "grad_norm": 0.8947045803070068, "learning_rate": 2.685556892805506e-06, "loss": 2.1831031799316407, "step": 107260 }, { "epoch": 0.8658976615031926, "grad_norm": 0.6259348392486572, "learning_rate": 2.683941423067293e-06, "loss": 2.658770942687988, "step": 107270 }, { "epoch": 0.8659783828289596, "grad_norm": 0.8608953952789307, "learning_rate": 2.6823259533290797e-06, "loss": 2.140621566772461, "step": 107280 }, { "epoch": 0.8660591041547266, "grad_norm": 1.1834659576416016, "learning_rate": 2.6807104835908665e-06, "loss": 2.2082778930664064, "step": 107290 }, { "epoch": 0.8661398254804937, "grad_norm": 0.746229887008667, "learning_rate": 2.6790950138526533e-06, "loss": 2.153802490234375, "step": 107300 }, { "epoch": 0.8662205468062607, "grad_norm": 0.6465238332748413, "learning_rate": 2.67747954411444e-06, "loss": 1.9085983276367187, "step": 107310 }, { "epoch": 0.8663012681320278, "grad_norm": 0.9232873320579529, "learning_rate": 2.675864074376227e-06, "loss": 2.2886390686035156, "step": 107320 }, { "epoch": 0.8663819894577949, "grad_norm": 0.9438860416412354, "learning_rate": 2.674248604638014e-06, "loss": 2.109329605102539, "step": 107330 }, { "epoch": 0.866462710783562, "grad_norm": 1.0194292068481445, "learning_rate": 2.6726331348998007e-06, "loss": 2.594239044189453, "step": 107340 }, { "epoch": 0.8665434321093289, "grad_norm": 1.0523812770843506, "learning_rate": 2.6710176651615876e-06, "loss": 2.496219253540039, "step": 107350 }, { "epoch": 0.866624153435096, "grad_norm": 0.9191582202911377, "learning_rate": 2.6694021954233744e-06, "loss": 2.2915136337280275, "step": 107360 }, { "epoch": 0.8667048747608631, "grad_norm": 0.9239192605018616, "learning_rate": 2.6677867256851613e-06, "loss": 2.4305768966674806, "step": 107370 }, { "epoch": 0.8667855960866301, "grad_norm": 0.5458263754844666, "learning_rate": 2.666171255946948e-06, "loss": 2.6349864959716798, "step": 107380 }, { "epoch": 0.8668663174123972, "grad_norm": 0.6078943014144897, "learning_rate": 2.664555786208735e-06, "loss": 2.285449409484863, "step": 107390 }, { "epoch": 0.8669470387381643, "grad_norm": 1.2142915725708008, "learning_rate": 2.662940316470522e-06, "loss": 2.408208656311035, "step": 107400 }, { "epoch": 0.8670277600639313, "grad_norm": 1.1887755393981934, "learning_rate": 2.6613248467323086e-06, "loss": 2.108407211303711, "step": 107410 }, { "epoch": 0.8671084813896983, "grad_norm": 0.6386947631835938, "learning_rate": 2.6597093769940955e-06, "loss": 2.35599308013916, "step": 107420 }, { "epoch": 0.8671892027154654, "grad_norm": 0.5851348042488098, "learning_rate": 2.6580939072558828e-06, "loss": 2.5141845703125, "step": 107430 }, { "epoch": 0.8672699240412325, "grad_norm": 0.970272421836853, "learning_rate": 2.6564784375176696e-06, "loss": 2.2785240173339845, "step": 107440 }, { "epoch": 0.8673506453669995, "grad_norm": 0.7704703211784363, "learning_rate": 2.6548629677794565e-06, "loss": 2.067245101928711, "step": 107450 }, { "epoch": 0.8674313666927665, "grad_norm": 0.8978032469749451, "learning_rate": 2.6532474980412433e-06, "loss": 2.226984405517578, "step": 107460 }, { "epoch": 0.8675120880185336, "grad_norm": 0.8674176931381226, "learning_rate": 2.65163202830303e-06, "loss": 2.3001861572265625, "step": 107470 }, { "epoch": 0.8675928093443007, "grad_norm": 1.088680624961853, "learning_rate": 2.650016558564817e-06, "loss": 2.232928466796875, "step": 107480 }, { "epoch": 0.8676735306700677, "grad_norm": 0.5860568881034851, "learning_rate": 2.648401088826604e-06, "loss": 1.9383621215820312, "step": 107490 }, { "epoch": 0.8677542519958348, "grad_norm": 0.8472632765769958, "learning_rate": 2.6467856190883907e-06, "loss": 2.325917625427246, "step": 107500 }, { "epoch": 0.8678349733216019, "grad_norm": 1.1109898090362549, "learning_rate": 2.6451701493501775e-06, "loss": 2.4597110748291016, "step": 107510 }, { "epoch": 0.8679156946473688, "grad_norm": 1.2969976663589478, "learning_rate": 2.6435546796119644e-06, "loss": 2.2968631744384767, "step": 107520 }, { "epoch": 0.8679964159731359, "grad_norm": 1.1316450834274292, "learning_rate": 2.6419392098737512e-06, "loss": 2.32143497467041, "step": 107530 }, { "epoch": 0.868077137298903, "grad_norm": 0.6430922746658325, "learning_rate": 2.640323740135538e-06, "loss": 2.008833312988281, "step": 107540 }, { "epoch": 0.8681578586246701, "grad_norm": 1.2492488622665405, "learning_rate": 2.638708270397325e-06, "loss": 2.3393592834472656, "step": 107550 }, { "epoch": 0.8682385799504371, "grad_norm": 1.2490508556365967, "learning_rate": 2.6370928006591118e-06, "loss": 2.372345733642578, "step": 107560 }, { "epoch": 0.8683193012762042, "grad_norm": 0.9841511249542236, "learning_rate": 2.6354773309208986e-06, "loss": 2.0165178298950197, "step": 107570 }, { "epoch": 0.8684000226019712, "grad_norm": 0.9273010492324829, "learning_rate": 2.6338618611826854e-06, "loss": 2.2898582458496093, "step": 107580 }, { "epoch": 0.8684807439277382, "grad_norm": 1.392473816871643, "learning_rate": 2.6322463914444723e-06, "loss": 2.315290069580078, "step": 107590 }, { "epoch": 0.8685614652535053, "grad_norm": 0.7550687193870544, "learning_rate": 2.630630921706259e-06, "loss": 2.42342586517334, "step": 107600 }, { "epoch": 0.8686421865792724, "grad_norm": 0.6590511202812195, "learning_rate": 2.629015451968046e-06, "loss": 1.991358757019043, "step": 107610 }, { "epoch": 0.8687229079050395, "grad_norm": 1.106205940246582, "learning_rate": 2.627399982229833e-06, "loss": 2.315701484680176, "step": 107620 }, { "epoch": 0.8688036292308065, "grad_norm": 0.8926602005958557, "learning_rate": 2.6257845124916197e-06, "loss": 2.4824203491210937, "step": 107630 }, { "epoch": 0.8688843505565735, "grad_norm": 0.8899965286254883, "learning_rate": 2.6241690427534065e-06, "loss": 2.570787811279297, "step": 107640 }, { "epoch": 0.8689650718823406, "grad_norm": 0.7859399914741516, "learning_rate": 2.6225535730151934e-06, "loss": 2.3655248641967774, "step": 107650 }, { "epoch": 0.8690457932081076, "grad_norm": 1.4131050109863281, "learning_rate": 2.6209381032769802e-06, "loss": 3.019658851623535, "step": 107660 }, { "epoch": 0.8691265145338747, "grad_norm": 0.8530011773109436, "learning_rate": 2.619322633538767e-06, "loss": 2.2060705184936524, "step": 107670 }, { "epoch": 0.8692072358596418, "grad_norm": 1.8598777055740356, "learning_rate": 2.6177071638005543e-06, "loss": 2.3692371368408205, "step": 107680 }, { "epoch": 0.8692879571854089, "grad_norm": 1.0598297119140625, "learning_rate": 2.616091694062341e-06, "loss": 2.4472024917602537, "step": 107690 }, { "epoch": 0.8693686785111758, "grad_norm": 1.2988585233688354, "learning_rate": 2.614476224324128e-06, "loss": 2.4877214431762695, "step": 107700 }, { "epoch": 0.8694493998369429, "grad_norm": 1.9683713912963867, "learning_rate": 2.612860754585915e-06, "loss": 2.8325572967529298, "step": 107710 }, { "epoch": 0.86953012116271, "grad_norm": 0.7422746419906616, "learning_rate": 2.6112452848477017e-06, "loss": 2.4097005844116213, "step": 107720 }, { "epoch": 0.869610842488477, "grad_norm": 1.1766515970230103, "learning_rate": 2.6096298151094886e-06, "loss": 2.052456283569336, "step": 107730 }, { "epoch": 0.8696915638142441, "grad_norm": 1.2634795904159546, "learning_rate": 2.6080143453712754e-06, "loss": 2.15417594909668, "step": 107740 }, { "epoch": 0.8697722851400111, "grad_norm": 0.6484641432762146, "learning_rate": 2.6063988756330622e-06, "loss": 2.1828603744506836, "step": 107750 }, { "epoch": 0.8698530064657782, "grad_norm": 1.0380247831344604, "learning_rate": 2.604783405894849e-06, "loss": 2.7844215393066407, "step": 107760 }, { "epoch": 0.8699337277915452, "grad_norm": 0.6984224915504456, "learning_rate": 2.6031679361566364e-06, "loss": 2.289871025085449, "step": 107770 }, { "epoch": 0.8700144491173123, "grad_norm": 1.0823549032211304, "learning_rate": 2.601552466418423e-06, "loss": 2.377110481262207, "step": 107780 }, { "epoch": 0.8700951704430794, "grad_norm": 0.7736114263534546, "learning_rate": 2.59993699668021e-06, "loss": 2.2820011138916017, "step": 107790 }, { "epoch": 0.8701758917688465, "grad_norm": 0.8614465594291687, "learning_rate": 2.598321526941997e-06, "loss": 2.420015335083008, "step": 107800 }, { "epoch": 0.8702566130946134, "grad_norm": 0.8552151322364807, "learning_rate": 2.5967060572037837e-06, "loss": 2.220347595214844, "step": 107810 }, { "epoch": 0.8703373344203805, "grad_norm": 0.6292397975921631, "learning_rate": 2.5950905874655706e-06, "loss": 2.601620101928711, "step": 107820 }, { "epoch": 0.8704180557461476, "grad_norm": 0.5025361180305481, "learning_rate": 2.5934751177273574e-06, "loss": 2.3788629531860352, "step": 107830 }, { "epoch": 0.8704987770719146, "grad_norm": 0.9377263188362122, "learning_rate": 2.5918596479891443e-06, "loss": 2.698694610595703, "step": 107840 }, { "epoch": 0.8705794983976817, "grad_norm": 0.6746633648872375, "learning_rate": 2.590244178250931e-06, "loss": 2.592670440673828, "step": 107850 }, { "epoch": 0.8706602197234488, "grad_norm": 0.5281739234924316, "learning_rate": 2.5886287085127184e-06, "loss": 2.1321537017822267, "step": 107860 }, { "epoch": 0.8707409410492158, "grad_norm": 1.571173071861267, "learning_rate": 2.5870132387745052e-06, "loss": 2.3947626113891602, "step": 107870 }, { "epoch": 0.8708216623749828, "grad_norm": 0.9029568433761597, "learning_rate": 2.585397769036292e-06, "loss": 2.5354503631591796, "step": 107880 }, { "epoch": 0.8709023837007499, "grad_norm": 0.5851708650588989, "learning_rate": 2.583782299298079e-06, "loss": 2.591052436828613, "step": 107890 }, { "epoch": 0.870983105026517, "grad_norm": 0.799102783203125, "learning_rate": 2.5821668295598658e-06, "loss": 2.1352563858032227, "step": 107900 }, { "epoch": 0.871063826352284, "grad_norm": 1.1920918226242065, "learning_rate": 2.5805513598216526e-06, "loss": 2.145524787902832, "step": 107910 }, { "epoch": 0.871144547678051, "grad_norm": 0.6908248066902161, "learning_rate": 2.5789358900834395e-06, "loss": 2.5774036407470704, "step": 107920 }, { "epoch": 0.8712252690038181, "grad_norm": 0.6466233730316162, "learning_rate": 2.5773204203452263e-06, "loss": 2.246176338195801, "step": 107930 }, { "epoch": 0.8713059903295852, "grad_norm": 1.1525923013687134, "learning_rate": 2.575704950607013e-06, "loss": 2.353286552429199, "step": 107940 }, { "epoch": 0.8713867116553522, "grad_norm": 0.8924795985221863, "learning_rate": 2.5740894808688e-06, "loss": 2.509030342102051, "step": 107950 }, { "epoch": 0.8714674329811193, "grad_norm": 1.4027941226959229, "learning_rate": 2.572474011130587e-06, "loss": 2.4875713348388673, "step": 107960 }, { "epoch": 0.8715481543068864, "grad_norm": 1.1474815607070923, "learning_rate": 2.5708585413923737e-06, "loss": 2.397287368774414, "step": 107970 }, { "epoch": 0.8716288756326533, "grad_norm": 0.7750897407531738, "learning_rate": 2.5692430716541605e-06, "loss": 2.7955657958984377, "step": 107980 }, { "epoch": 0.8717095969584204, "grad_norm": 0.752754271030426, "learning_rate": 2.5676276019159474e-06, "loss": 2.0660120010375977, "step": 107990 }, { "epoch": 0.8717903182841875, "grad_norm": 1.0764093399047852, "learning_rate": 2.5660121321777342e-06, "loss": 2.293558120727539, "step": 108000 }, { "epoch": 0.8718710396099546, "grad_norm": 0.6762679219245911, "learning_rate": 2.564396662439521e-06, "loss": 2.426170539855957, "step": 108010 }, { "epoch": 0.8719517609357216, "grad_norm": 0.7812990546226501, "learning_rate": 2.562781192701308e-06, "loss": 2.7447202682495115, "step": 108020 }, { "epoch": 0.8720324822614887, "grad_norm": 0.6861573457717896, "learning_rate": 2.5611657229630948e-06, "loss": 1.8887147903442383, "step": 108030 }, { "epoch": 0.8721132035872557, "grad_norm": 0.5669125914573669, "learning_rate": 2.5595502532248816e-06, "loss": 2.8492238998413084, "step": 108040 }, { "epoch": 0.8721939249130227, "grad_norm": 0.5700966119766235, "learning_rate": 2.5579347834866685e-06, "loss": 1.8140079498291015, "step": 108050 }, { "epoch": 0.8722746462387898, "grad_norm": 0.9478627443313599, "learning_rate": 2.5563193137484553e-06, "loss": 2.426384925842285, "step": 108060 }, { "epoch": 0.8723553675645569, "grad_norm": 1.1322240829467773, "learning_rate": 2.554703844010242e-06, "loss": 2.7243581771850587, "step": 108070 }, { "epoch": 0.872436088890324, "grad_norm": 0.9974363446235657, "learning_rate": 2.553088374272029e-06, "loss": 2.9925138473510744, "step": 108080 }, { "epoch": 0.872516810216091, "grad_norm": 1.1733088493347168, "learning_rate": 2.551472904533816e-06, "loss": 2.4683565139770507, "step": 108090 }, { "epoch": 0.872597531541858, "grad_norm": 0.9686869382858276, "learning_rate": 2.5498574347956027e-06, "loss": 2.0266550064086912, "step": 108100 }, { "epoch": 0.8726782528676251, "grad_norm": 0.9537439942359924, "learning_rate": 2.5482419650573895e-06, "loss": 2.253253173828125, "step": 108110 }, { "epoch": 0.8727589741933921, "grad_norm": 0.6469954252243042, "learning_rate": 2.546626495319177e-06, "loss": 2.1429061889648438, "step": 108120 }, { "epoch": 0.8728396955191592, "grad_norm": 0.7364075183868408, "learning_rate": 2.5450110255809637e-06, "loss": 2.295937156677246, "step": 108130 }, { "epoch": 0.8729204168449263, "grad_norm": 0.5403721332550049, "learning_rate": 2.5433955558427505e-06, "loss": 2.550658416748047, "step": 108140 }, { "epoch": 0.8730011381706934, "grad_norm": 1.1597404479980469, "learning_rate": 2.5417800861045373e-06, "loss": 2.941604423522949, "step": 108150 }, { "epoch": 0.8730818594964603, "grad_norm": 0.9278605580329895, "learning_rate": 2.540164616366324e-06, "loss": 2.773391914367676, "step": 108160 }, { "epoch": 0.8731625808222274, "grad_norm": 0.6960628628730774, "learning_rate": 2.538549146628111e-06, "loss": 2.0850830078125, "step": 108170 }, { "epoch": 0.8732433021479945, "grad_norm": 0.8708362579345703, "learning_rate": 2.536933676889898e-06, "loss": 2.4150524139404297, "step": 108180 }, { "epoch": 0.8733240234737615, "grad_norm": 0.620634913444519, "learning_rate": 2.5353182071516847e-06, "loss": 2.816672134399414, "step": 108190 }, { "epoch": 0.8734047447995286, "grad_norm": 0.7445806264877319, "learning_rate": 2.5337027374134716e-06, "loss": 2.7159799575805663, "step": 108200 }, { "epoch": 0.8734854661252957, "grad_norm": 0.651429295539856, "learning_rate": 2.5320872676752584e-06, "loss": 2.1094871520996095, "step": 108210 }, { "epoch": 0.8735661874510627, "grad_norm": 0.82504802942276, "learning_rate": 2.5304717979370453e-06, "loss": 2.3180873870849608, "step": 108220 }, { "epoch": 0.8736469087768297, "grad_norm": 1.4364829063415527, "learning_rate": 2.528856328198832e-06, "loss": 3.155441093444824, "step": 108230 }, { "epoch": 0.8737276301025968, "grad_norm": 0.8716656565666199, "learning_rate": 2.527240858460619e-06, "loss": 2.141756057739258, "step": 108240 }, { "epoch": 0.8738083514283639, "grad_norm": 2.2749743461608887, "learning_rate": 2.525625388722406e-06, "loss": 2.6610744476318358, "step": 108250 }, { "epoch": 0.873889072754131, "grad_norm": 0.9967359304428101, "learning_rate": 2.5240099189841926e-06, "loss": 2.514257049560547, "step": 108260 }, { "epoch": 0.8739697940798979, "grad_norm": 0.967782199382782, "learning_rate": 2.5223944492459795e-06, "loss": 2.9866317749023437, "step": 108270 }, { "epoch": 0.874050515405665, "grad_norm": 1.8024344444274902, "learning_rate": 2.5207789795077663e-06, "loss": 2.216231346130371, "step": 108280 }, { "epoch": 0.8741312367314321, "grad_norm": 0.7077154517173767, "learning_rate": 2.519163509769553e-06, "loss": 2.2290927886962892, "step": 108290 }, { "epoch": 0.8742119580571991, "grad_norm": 1.2163546085357666, "learning_rate": 2.51754804003134e-06, "loss": 2.4021591186523437, "step": 108300 }, { "epoch": 0.8742926793829662, "grad_norm": 0.9609933495521545, "learning_rate": 2.515932570293127e-06, "loss": 2.418049430847168, "step": 108310 }, { "epoch": 0.8743734007087333, "grad_norm": 0.5549493432044983, "learning_rate": 2.5143171005549137e-06, "loss": 2.3404767990112303, "step": 108320 }, { "epoch": 0.8744541220345003, "grad_norm": 1.4383015632629395, "learning_rate": 2.5127016308167006e-06, "loss": 2.497258949279785, "step": 108330 }, { "epoch": 0.8745348433602673, "grad_norm": 0.9107482433319092, "learning_rate": 2.5110861610784874e-06, "loss": 2.521642303466797, "step": 108340 }, { "epoch": 0.8746155646860344, "grad_norm": 1.6743711233139038, "learning_rate": 2.5094706913402743e-06, "loss": 2.370659828186035, "step": 108350 }, { "epoch": 0.8746962860118015, "grad_norm": 0.7642120718955994, "learning_rate": 2.507855221602061e-06, "loss": 2.1413551330566407, "step": 108360 }, { "epoch": 0.8747770073375685, "grad_norm": 1.1169979572296143, "learning_rate": 2.506239751863848e-06, "loss": 2.384703254699707, "step": 108370 }, { "epoch": 0.8748577286633356, "grad_norm": 0.5911742448806763, "learning_rate": 2.5046242821256352e-06, "loss": 2.190419578552246, "step": 108380 }, { "epoch": 0.8749384499891026, "grad_norm": 1.232617974281311, "learning_rate": 2.503008812387422e-06, "loss": 2.1799644470214843, "step": 108390 }, { "epoch": 0.8750191713148697, "grad_norm": 1.3491443395614624, "learning_rate": 2.5013933426492093e-06, "loss": 2.6214412689208983, "step": 108400 }, { "epoch": 0.8750998926406367, "grad_norm": 0.6818053722381592, "learning_rate": 2.4997778729109958e-06, "loss": 2.205581283569336, "step": 108410 }, { "epoch": 0.8751806139664038, "grad_norm": 0.9638053178787231, "learning_rate": 2.4981624031727826e-06, "loss": 2.0449262619018556, "step": 108420 }, { "epoch": 0.8752613352921709, "grad_norm": 1.1412756443023682, "learning_rate": 2.4965469334345694e-06, "loss": 2.036482048034668, "step": 108430 }, { "epoch": 0.8753420566179378, "grad_norm": 0.5786088109016418, "learning_rate": 2.4949314636963563e-06, "loss": 2.2459033966064452, "step": 108440 }, { "epoch": 0.8754227779437049, "grad_norm": 1.1616466045379639, "learning_rate": 2.493315993958143e-06, "loss": 2.300758934020996, "step": 108450 }, { "epoch": 0.875503499269472, "grad_norm": 0.7269076108932495, "learning_rate": 2.49170052421993e-06, "loss": 2.300358772277832, "step": 108460 }, { "epoch": 0.8755842205952391, "grad_norm": 0.9228182435035706, "learning_rate": 2.4900850544817173e-06, "loss": 2.0419301986694336, "step": 108470 }, { "epoch": 0.8756649419210061, "grad_norm": 0.46451273560523987, "learning_rate": 2.488469584743504e-06, "loss": 2.390542411804199, "step": 108480 }, { "epoch": 0.8757456632467732, "grad_norm": 1.1595369577407837, "learning_rate": 2.486854115005291e-06, "loss": 2.7698545455932617, "step": 108490 }, { "epoch": 0.8758263845725403, "grad_norm": 0.8030629754066467, "learning_rate": 2.485238645267078e-06, "loss": 2.163850021362305, "step": 108500 }, { "epoch": 0.8759071058983072, "grad_norm": 1.2284729480743408, "learning_rate": 2.4836231755288646e-06, "loss": 2.202050971984863, "step": 108510 }, { "epoch": 0.8759878272240743, "grad_norm": 0.9694015383720398, "learning_rate": 2.4820077057906515e-06, "loss": 2.6128381729125976, "step": 108520 }, { "epoch": 0.8760685485498414, "grad_norm": 1.003239393234253, "learning_rate": 2.4803922360524383e-06, "loss": 2.6091903686523437, "step": 108530 }, { "epoch": 0.8761492698756085, "grad_norm": 1.011129379272461, "learning_rate": 2.478776766314225e-06, "loss": 2.566156578063965, "step": 108540 }, { "epoch": 0.8762299912013755, "grad_norm": 1.106013536453247, "learning_rate": 2.477161296576012e-06, "loss": 2.55946044921875, "step": 108550 }, { "epoch": 0.8763107125271425, "grad_norm": 0.675396203994751, "learning_rate": 2.4755458268377993e-06, "loss": 2.1930957794189454, "step": 108560 }, { "epoch": 0.8763914338529096, "grad_norm": 0.6972605586051941, "learning_rate": 2.473930357099586e-06, "loss": 2.278862953186035, "step": 108570 }, { "epoch": 0.8764721551786766, "grad_norm": 1.3844802379608154, "learning_rate": 2.472314887361373e-06, "loss": 2.610296058654785, "step": 108580 }, { "epoch": 0.8765528765044437, "grad_norm": 0.8975048661231995, "learning_rate": 2.47069941762316e-06, "loss": 2.2041509628295897, "step": 108590 }, { "epoch": 0.8766335978302108, "grad_norm": 1.2073936462402344, "learning_rate": 2.4690839478849467e-06, "loss": 2.318246841430664, "step": 108600 }, { "epoch": 0.8767143191559779, "grad_norm": 0.8816874027252197, "learning_rate": 2.4674684781467335e-06, "loss": 2.33947696685791, "step": 108610 }, { "epoch": 0.8767950404817448, "grad_norm": 0.81031733751297, "learning_rate": 2.4658530084085204e-06, "loss": 2.197707176208496, "step": 108620 }, { "epoch": 0.8768757618075119, "grad_norm": 0.8442996144294739, "learning_rate": 2.464237538670307e-06, "loss": 2.12091121673584, "step": 108630 }, { "epoch": 0.876956483133279, "grad_norm": 0.9456098675727844, "learning_rate": 2.462622068932094e-06, "loss": 2.880420112609863, "step": 108640 }, { "epoch": 0.877037204459046, "grad_norm": 0.7533694505691528, "learning_rate": 2.461006599193881e-06, "loss": 2.40708065032959, "step": 108650 }, { "epoch": 0.8771179257848131, "grad_norm": 0.9437209963798523, "learning_rate": 2.4593911294556677e-06, "loss": 2.0841020584106444, "step": 108660 }, { "epoch": 0.8771986471105802, "grad_norm": 0.8068723678588867, "learning_rate": 2.4577756597174546e-06, "loss": 1.981929397583008, "step": 108670 }, { "epoch": 0.8772793684363472, "grad_norm": 0.6709519028663635, "learning_rate": 2.4561601899792414e-06, "loss": 2.040567398071289, "step": 108680 }, { "epoch": 0.8773600897621142, "grad_norm": 1.3386280536651611, "learning_rate": 2.4545447202410283e-06, "loss": 2.115294647216797, "step": 108690 }, { "epoch": 0.8774408110878813, "grad_norm": 1.0505037307739258, "learning_rate": 2.452929250502815e-06, "loss": 2.9026309967041017, "step": 108700 }, { "epoch": 0.8775215324136484, "grad_norm": 1.0479798316955566, "learning_rate": 2.451313780764602e-06, "loss": 2.639212417602539, "step": 108710 }, { "epoch": 0.8776022537394154, "grad_norm": 0.8521621227264404, "learning_rate": 2.449698311026389e-06, "loss": 2.0515975952148438, "step": 108720 }, { "epoch": 0.8776829750651824, "grad_norm": 1.0587936639785767, "learning_rate": 2.4480828412881757e-06, "loss": 2.2006462097167967, "step": 108730 }, { "epoch": 0.8777636963909495, "grad_norm": 0.6566925048828125, "learning_rate": 2.4464673715499625e-06, "loss": 2.4346628189086914, "step": 108740 }, { "epoch": 0.8778444177167166, "grad_norm": 1.0047739744186401, "learning_rate": 2.4448519018117494e-06, "loss": 2.380550193786621, "step": 108750 }, { "epoch": 0.8779251390424836, "grad_norm": 0.6553685665130615, "learning_rate": 2.443236432073536e-06, "loss": 2.1465126037597657, "step": 108760 }, { "epoch": 0.8780058603682507, "grad_norm": 1.2669159173965454, "learning_rate": 2.441620962335323e-06, "loss": 2.5069564819335937, "step": 108770 }, { "epoch": 0.8780865816940178, "grad_norm": 0.6689743399620056, "learning_rate": 2.44000549259711e-06, "loss": 2.5215890884399412, "step": 108780 }, { "epoch": 0.8781673030197849, "grad_norm": 1.045982003211975, "learning_rate": 2.4383900228588967e-06, "loss": 2.3384174346923827, "step": 108790 }, { "epoch": 0.8782480243455518, "grad_norm": 0.9844449162483215, "learning_rate": 2.4367745531206836e-06, "loss": 2.5126243591308595, "step": 108800 }, { "epoch": 0.8783287456713189, "grad_norm": 1.6079598665237427, "learning_rate": 2.435159083382471e-06, "loss": 1.8554914474487305, "step": 108810 }, { "epoch": 0.878409466997086, "grad_norm": 0.7378340363502502, "learning_rate": 2.4335436136442577e-06, "loss": 2.141632652282715, "step": 108820 }, { "epoch": 0.878490188322853, "grad_norm": 0.7248628735542297, "learning_rate": 2.4319281439060445e-06, "loss": 2.312108039855957, "step": 108830 }, { "epoch": 0.8785709096486201, "grad_norm": 0.9246771335601807, "learning_rate": 2.4303126741678314e-06, "loss": 2.7170827865600584, "step": 108840 }, { "epoch": 0.8786516309743871, "grad_norm": 0.8112391233444214, "learning_rate": 2.4286972044296182e-06, "loss": 2.349863052368164, "step": 108850 }, { "epoch": 0.8787323523001542, "grad_norm": 0.9884560108184814, "learning_rate": 2.427081734691405e-06, "loss": 2.5076272964477537, "step": 108860 }, { "epoch": 0.8788130736259212, "grad_norm": 0.9980230331420898, "learning_rate": 2.425466264953192e-06, "loss": 2.442374038696289, "step": 108870 }, { "epoch": 0.8788937949516883, "grad_norm": 0.8303342461585999, "learning_rate": 2.4238507952149788e-06, "loss": 2.565827560424805, "step": 108880 }, { "epoch": 0.8789745162774554, "grad_norm": 0.7482895255088806, "learning_rate": 2.4222353254767656e-06, "loss": 2.1625852584838867, "step": 108890 }, { "epoch": 0.8790552376032224, "grad_norm": 0.668056070804596, "learning_rate": 2.420619855738553e-06, "loss": 2.764640235900879, "step": 108900 }, { "epoch": 0.8791359589289894, "grad_norm": 0.7433006167411804, "learning_rate": 2.4190043860003397e-06, "loss": 2.5207050323486326, "step": 108910 }, { "epoch": 0.8792166802547565, "grad_norm": 0.5124809145927429, "learning_rate": 2.4173889162621266e-06, "loss": 2.1815160751342773, "step": 108920 }, { "epoch": 0.8792974015805236, "grad_norm": 0.9540010690689087, "learning_rate": 2.4157734465239134e-06, "loss": 2.7003973007202147, "step": 108930 }, { "epoch": 0.8793781229062906, "grad_norm": 0.7714804410934448, "learning_rate": 2.4141579767857003e-06, "loss": 2.3392316818237306, "step": 108940 }, { "epoch": 0.8794588442320577, "grad_norm": 0.9465446472167969, "learning_rate": 2.412542507047487e-06, "loss": 2.351309585571289, "step": 108950 }, { "epoch": 0.8795395655578248, "grad_norm": 0.9635792374610901, "learning_rate": 2.410927037309274e-06, "loss": 1.9968765258789063, "step": 108960 }, { "epoch": 0.8796202868835917, "grad_norm": 0.5912031531333923, "learning_rate": 2.409311567571061e-06, "loss": 2.3447185516357423, "step": 108970 }, { "epoch": 0.8797010082093588, "grad_norm": 1.2686080932617188, "learning_rate": 2.4076960978328477e-06, "loss": 2.329405403137207, "step": 108980 }, { "epoch": 0.8797817295351259, "grad_norm": 1.0868431329727173, "learning_rate": 2.4060806280946345e-06, "loss": 2.7144496917724608, "step": 108990 }, { "epoch": 0.879862450860893, "grad_norm": 0.6420977115631104, "learning_rate": 2.4044651583564213e-06, "loss": 2.515878105163574, "step": 109000 }, { "epoch": 0.87994317218666, "grad_norm": 0.7474488615989685, "learning_rate": 2.402849688618208e-06, "loss": 2.657337188720703, "step": 109010 }, { "epoch": 0.880023893512427, "grad_norm": 0.5517001152038574, "learning_rate": 2.401234218879995e-06, "loss": 2.417513847351074, "step": 109020 }, { "epoch": 0.8801046148381941, "grad_norm": 1.2081513404846191, "learning_rate": 2.399618749141782e-06, "loss": 2.272967529296875, "step": 109030 }, { "epoch": 0.8801853361639611, "grad_norm": 0.8083730936050415, "learning_rate": 2.3980032794035687e-06, "loss": 2.444799613952637, "step": 109040 }, { "epoch": 0.8802660574897282, "grad_norm": 0.8205954432487488, "learning_rate": 2.3963878096653556e-06, "loss": 2.2976688385009765, "step": 109050 }, { "epoch": 0.8803467788154953, "grad_norm": 1.1995309591293335, "learning_rate": 2.3947723399271424e-06, "loss": 2.2150760650634767, "step": 109060 }, { "epoch": 0.8804275001412624, "grad_norm": 1.2311674356460571, "learning_rate": 2.3931568701889293e-06, "loss": 1.8745807647705077, "step": 109070 }, { "epoch": 0.8805082214670293, "grad_norm": 0.8988634347915649, "learning_rate": 2.391541400450716e-06, "loss": 2.1268310546875, "step": 109080 }, { "epoch": 0.8805889427927964, "grad_norm": 0.5841549634933472, "learning_rate": 2.389925930712503e-06, "loss": 2.2489011764526365, "step": 109090 }, { "epoch": 0.8806696641185635, "grad_norm": 0.4244767129421234, "learning_rate": 2.38831046097429e-06, "loss": 1.8094831466674806, "step": 109100 }, { "epoch": 0.8807503854443305, "grad_norm": 0.8254785537719727, "learning_rate": 2.3866949912360767e-06, "loss": 2.597262954711914, "step": 109110 }, { "epoch": 0.8808311067700976, "grad_norm": 0.905619740486145, "learning_rate": 2.3850795214978635e-06, "loss": 2.6557607650756836, "step": 109120 }, { "epoch": 0.8809118280958647, "grad_norm": 0.9214736819267273, "learning_rate": 2.3834640517596503e-06, "loss": 2.0945449829101563, "step": 109130 }, { "epoch": 0.8809925494216317, "grad_norm": 1.0887378454208374, "learning_rate": 2.381848582021437e-06, "loss": 2.085409736633301, "step": 109140 }, { "epoch": 0.8810732707473987, "grad_norm": 0.8942422270774841, "learning_rate": 2.380233112283224e-06, "loss": 2.6348628997802734, "step": 109150 }, { "epoch": 0.8811539920731658, "grad_norm": 0.6097655892372131, "learning_rate": 2.3786176425450113e-06, "loss": 2.064090919494629, "step": 109160 }, { "epoch": 0.8812347133989329, "grad_norm": 0.6175292134284973, "learning_rate": 2.377002172806798e-06, "loss": 2.7537155151367188, "step": 109170 }, { "epoch": 0.8813154347246999, "grad_norm": 1.081681489944458, "learning_rate": 2.375386703068585e-06, "loss": 3.1933212280273438, "step": 109180 }, { "epoch": 0.881396156050467, "grad_norm": 1.1470006704330444, "learning_rate": 2.373771233330372e-06, "loss": 2.4533466339111327, "step": 109190 }, { "epoch": 0.881476877376234, "grad_norm": 0.956786036491394, "learning_rate": 2.3721557635921587e-06, "loss": 2.218798828125, "step": 109200 }, { "epoch": 0.8815575987020011, "grad_norm": 0.9731940031051636, "learning_rate": 2.3705402938539455e-06, "loss": 2.331728935241699, "step": 109210 }, { "epoch": 0.8816383200277681, "grad_norm": 0.9982484579086304, "learning_rate": 2.3689248241157324e-06, "loss": 2.029144859313965, "step": 109220 }, { "epoch": 0.8817190413535352, "grad_norm": 0.7322057485580444, "learning_rate": 2.3673093543775192e-06, "loss": 2.1662031173706056, "step": 109230 }, { "epoch": 0.8817997626793023, "grad_norm": 0.6813943386077881, "learning_rate": 2.365693884639306e-06, "loss": 2.1527050018310545, "step": 109240 }, { "epoch": 0.8818804840050694, "grad_norm": 0.8069011569023132, "learning_rate": 2.3640784149010933e-06, "loss": 2.6497358322143554, "step": 109250 }, { "epoch": 0.8819612053308363, "grad_norm": 0.6463908553123474, "learning_rate": 2.36246294516288e-06, "loss": 2.0087718963623047, "step": 109260 }, { "epoch": 0.8820419266566034, "grad_norm": 2.5667452812194824, "learning_rate": 2.360847475424667e-06, "loss": 2.5002113342285157, "step": 109270 }, { "epoch": 0.8821226479823705, "grad_norm": 0.7686469554901123, "learning_rate": 2.359232005686454e-06, "loss": 2.372523307800293, "step": 109280 }, { "epoch": 0.8822033693081375, "grad_norm": 0.9068716764450073, "learning_rate": 2.3576165359482407e-06, "loss": 2.483244514465332, "step": 109290 }, { "epoch": 0.8822840906339046, "grad_norm": 0.8510851263999939, "learning_rate": 2.3560010662100276e-06, "loss": 2.4435075759887694, "step": 109300 }, { "epoch": 0.8823648119596716, "grad_norm": 1.0416299104690552, "learning_rate": 2.3543855964718144e-06, "loss": 2.703864860534668, "step": 109310 }, { "epoch": 0.8824455332854387, "grad_norm": 0.49730855226516724, "learning_rate": 2.3527701267336013e-06, "loss": 2.1839824676513673, "step": 109320 }, { "epoch": 0.8825262546112057, "grad_norm": 0.8698077201843262, "learning_rate": 2.351154656995388e-06, "loss": 2.393285942077637, "step": 109330 }, { "epoch": 0.8826069759369728, "grad_norm": 0.6539815664291382, "learning_rate": 2.349539187257175e-06, "loss": 2.5245996475219727, "step": 109340 }, { "epoch": 0.8826876972627399, "grad_norm": 0.7794491648674011, "learning_rate": 2.347923717518962e-06, "loss": 2.4905866622924804, "step": 109350 }, { "epoch": 0.8827684185885069, "grad_norm": 0.474581241607666, "learning_rate": 2.3463082477807486e-06, "loss": 2.3143026351928713, "step": 109360 }, { "epoch": 0.8828491399142739, "grad_norm": 0.37336406111717224, "learning_rate": 2.3446927780425355e-06, "loss": 1.9304729461669923, "step": 109370 }, { "epoch": 0.882929861240041, "grad_norm": 1.457276463508606, "learning_rate": 2.3430773083043223e-06, "loss": 2.094335174560547, "step": 109380 }, { "epoch": 0.8830105825658081, "grad_norm": 0.5886525511741638, "learning_rate": 2.341461838566109e-06, "loss": 2.389309310913086, "step": 109390 }, { "epoch": 0.8830913038915751, "grad_norm": 1.1824506521224976, "learning_rate": 2.339846368827896e-06, "loss": 2.2190202713012694, "step": 109400 }, { "epoch": 0.8831720252173422, "grad_norm": 0.9792608022689819, "learning_rate": 2.338230899089683e-06, "loss": 2.2636043548583986, "step": 109410 }, { "epoch": 0.8832527465431093, "grad_norm": 1.12721586227417, "learning_rate": 2.3366154293514697e-06, "loss": 2.1708202362060547, "step": 109420 }, { "epoch": 0.8833334678688762, "grad_norm": 1.0278607606887817, "learning_rate": 2.3349999596132566e-06, "loss": 3.0835903167724608, "step": 109430 }, { "epoch": 0.8834141891946433, "grad_norm": 0.7141833305358887, "learning_rate": 2.3333844898750434e-06, "loss": 2.4748041152954103, "step": 109440 }, { "epoch": 0.8834949105204104, "grad_norm": 0.7053353190422058, "learning_rate": 2.3317690201368303e-06, "loss": 2.646473503112793, "step": 109450 }, { "epoch": 0.8835756318461775, "grad_norm": 0.6411795616149902, "learning_rate": 2.330153550398617e-06, "loss": 2.32617244720459, "step": 109460 }, { "epoch": 0.8836563531719445, "grad_norm": 0.5837858319282532, "learning_rate": 2.328538080660404e-06, "loss": 2.138759994506836, "step": 109470 }, { "epoch": 0.8837370744977115, "grad_norm": 1.034903883934021, "learning_rate": 2.326922610922191e-06, "loss": 2.2964529037475585, "step": 109480 }, { "epoch": 0.8838177958234786, "grad_norm": 0.8661099672317505, "learning_rate": 2.3253071411839776e-06, "loss": 2.139459991455078, "step": 109490 }, { "epoch": 0.8838985171492456, "grad_norm": 1.0743768215179443, "learning_rate": 2.323691671445765e-06, "loss": 2.5555347442626952, "step": 109500 }, { "epoch": 0.8839792384750127, "grad_norm": 1.0759812593460083, "learning_rate": 2.3220762017075517e-06, "loss": 2.566901206970215, "step": 109510 }, { "epoch": 0.8840599598007798, "grad_norm": 0.8459766507148743, "learning_rate": 2.3204607319693386e-06, "loss": 2.2442434310913084, "step": 109520 }, { "epoch": 0.8841406811265469, "grad_norm": 0.8611572980880737, "learning_rate": 2.3188452622311254e-06, "loss": 2.327630043029785, "step": 109530 }, { "epoch": 0.8842214024523138, "grad_norm": 1.2137444019317627, "learning_rate": 2.3172297924929123e-06, "loss": 2.805167388916016, "step": 109540 }, { "epoch": 0.8843021237780809, "grad_norm": 0.7336142659187317, "learning_rate": 2.315614322754699e-06, "loss": 2.812450408935547, "step": 109550 }, { "epoch": 0.884382845103848, "grad_norm": 2.149704694747925, "learning_rate": 2.313998853016486e-06, "loss": 2.6250160217285154, "step": 109560 }, { "epoch": 0.884463566429615, "grad_norm": 1.2301270961761475, "learning_rate": 2.312383383278273e-06, "loss": 2.3404537200927735, "step": 109570 }, { "epoch": 0.8845442877553821, "grad_norm": 1.8622748851776123, "learning_rate": 2.3107679135400597e-06, "loss": 2.870931625366211, "step": 109580 }, { "epoch": 0.8846250090811492, "grad_norm": 1.1118214130401611, "learning_rate": 2.3091524438018465e-06, "loss": 2.5273298263549804, "step": 109590 }, { "epoch": 0.8847057304069162, "grad_norm": 0.513160228729248, "learning_rate": 2.3075369740636338e-06, "loss": 2.531645965576172, "step": 109600 }, { "epoch": 0.8847864517326832, "grad_norm": 0.7559786438941956, "learning_rate": 2.3059215043254206e-06, "loss": 2.7512903213500977, "step": 109610 }, { "epoch": 0.8848671730584503, "grad_norm": 0.8179296851158142, "learning_rate": 2.3043060345872075e-06, "loss": 2.0296859741210938, "step": 109620 }, { "epoch": 0.8849478943842174, "grad_norm": 0.8254170417785645, "learning_rate": 2.3026905648489943e-06, "loss": 2.494198799133301, "step": 109630 }, { "epoch": 0.8850286157099844, "grad_norm": 0.5194411277770996, "learning_rate": 2.301075095110781e-06, "loss": 2.4782052993774415, "step": 109640 }, { "epoch": 0.8851093370357515, "grad_norm": 0.7864798307418823, "learning_rate": 2.299459625372568e-06, "loss": 2.169459915161133, "step": 109650 }, { "epoch": 0.8851900583615185, "grad_norm": 1.1433899402618408, "learning_rate": 2.297844155634355e-06, "loss": 2.7419692993164064, "step": 109660 }, { "epoch": 0.8852707796872856, "grad_norm": 1.446674108505249, "learning_rate": 2.2962286858961417e-06, "loss": 2.3399471282958983, "step": 109670 }, { "epoch": 0.8853515010130526, "grad_norm": 1.001923680305481, "learning_rate": 2.2946132161579285e-06, "loss": 2.0835687637329103, "step": 109680 }, { "epoch": 0.8854322223388197, "grad_norm": 0.9227404594421387, "learning_rate": 2.2929977464197154e-06, "loss": 2.4385726928710936, "step": 109690 }, { "epoch": 0.8855129436645868, "grad_norm": 1.3242177963256836, "learning_rate": 2.2913822766815022e-06, "loss": 2.4334953308105467, "step": 109700 }, { "epoch": 0.8855936649903537, "grad_norm": 0.5001887083053589, "learning_rate": 2.289766806943289e-06, "loss": 1.735776138305664, "step": 109710 }, { "epoch": 0.8856743863161208, "grad_norm": 0.4862627387046814, "learning_rate": 2.288151337205076e-06, "loss": 2.4609348297119142, "step": 109720 }, { "epoch": 0.8857551076418879, "grad_norm": 0.8460686802864075, "learning_rate": 2.2865358674668628e-06, "loss": 2.4963052749633787, "step": 109730 }, { "epoch": 0.885835828967655, "grad_norm": 0.8320173025131226, "learning_rate": 2.2849203977286496e-06, "loss": 2.298099708557129, "step": 109740 }, { "epoch": 0.885916550293422, "grad_norm": 0.9021255373954773, "learning_rate": 2.2833049279904365e-06, "loss": 2.3522693634033205, "step": 109750 }, { "epoch": 0.8859972716191891, "grad_norm": 0.7772514224052429, "learning_rate": 2.2816894582522233e-06, "loss": 1.8988813400268554, "step": 109760 }, { "epoch": 0.8860779929449561, "grad_norm": 0.8310779333114624, "learning_rate": 2.28007398851401e-06, "loss": 2.3696279525756836, "step": 109770 }, { "epoch": 0.8861587142707232, "grad_norm": 1.01242196559906, "learning_rate": 2.278458518775797e-06, "loss": 2.5793407440185545, "step": 109780 }, { "epoch": 0.8862394355964902, "grad_norm": 0.7722537517547607, "learning_rate": 2.276843049037584e-06, "loss": 2.2678853988647463, "step": 109790 }, { "epoch": 0.8863201569222573, "grad_norm": 0.6797842979431152, "learning_rate": 2.2752275792993707e-06, "loss": 2.309144401550293, "step": 109800 }, { "epoch": 0.8864008782480244, "grad_norm": 1.1215685606002808, "learning_rate": 2.273612109561158e-06, "loss": 2.3886077880859373, "step": 109810 }, { "epoch": 0.8864815995737914, "grad_norm": 0.7305083870887756, "learning_rate": 2.271996639822945e-06, "loss": 2.4355131149291993, "step": 109820 }, { "epoch": 0.8865623208995584, "grad_norm": 0.7411819696426392, "learning_rate": 2.2703811700847317e-06, "loss": 2.4488456726074217, "step": 109830 }, { "epoch": 0.8866430422253255, "grad_norm": 0.7237833738327026, "learning_rate": 2.2687657003465185e-06, "loss": 2.868345260620117, "step": 109840 }, { "epoch": 0.8867237635510926, "grad_norm": 0.8256844878196716, "learning_rate": 2.2671502306083053e-06, "loss": 2.4629596710205077, "step": 109850 }, { "epoch": 0.8868044848768596, "grad_norm": 0.907667875289917, "learning_rate": 2.265534760870092e-06, "loss": 2.0805810928344726, "step": 109860 }, { "epoch": 0.8868852062026267, "grad_norm": 2.2932095527648926, "learning_rate": 2.263919291131879e-06, "loss": 2.6875518798828124, "step": 109870 }, { "epoch": 0.8869659275283938, "grad_norm": 0.8326172232627869, "learning_rate": 2.262303821393666e-06, "loss": 2.51793212890625, "step": 109880 }, { "epoch": 0.8870466488541607, "grad_norm": 0.7459913492202759, "learning_rate": 2.2606883516554527e-06, "loss": 1.9565650939941406, "step": 109890 }, { "epoch": 0.8871273701799278, "grad_norm": 0.5471991896629333, "learning_rate": 2.2590728819172396e-06, "loss": 2.6651098251342775, "step": 109900 }, { "epoch": 0.8872080915056949, "grad_norm": 0.6840020418167114, "learning_rate": 2.2574574121790264e-06, "loss": 2.6854610443115234, "step": 109910 }, { "epoch": 0.887288812831462, "grad_norm": 0.8285743594169617, "learning_rate": 2.2558419424408133e-06, "loss": 2.433422660827637, "step": 109920 }, { "epoch": 0.887369534157229, "grad_norm": 1.200976014137268, "learning_rate": 2.2542264727026e-06, "loss": 2.3316158294677733, "step": 109930 }, { "epoch": 0.887450255482996, "grad_norm": 0.9822965860366821, "learning_rate": 2.252611002964387e-06, "loss": 2.226770782470703, "step": 109940 }, { "epoch": 0.8875309768087631, "grad_norm": 0.8754703402519226, "learning_rate": 2.2509955332261742e-06, "loss": 1.6997407913208007, "step": 109950 }, { "epoch": 0.8876116981345301, "grad_norm": 0.9616922736167908, "learning_rate": 2.249380063487961e-06, "loss": 2.3887285232543944, "step": 109960 }, { "epoch": 0.8876924194602972, "grad_norm": 1.2427668571472168, "learning_rate": 2.247764593749748e-06, "loss": 2.6769065856933594, "step": 109970 }, { "epoch": 0.8877731407860643, "grad_norm": 1.142898440361023, "learning_rate": 2.2461491240115348e-06, "loss": 2.2726797103881835, "step": 109980 }, { "epoch": 0.8878538621118314, "grad_norm": 0.7382035255432129, "learning_rate": 2.2445336542733216e-06, "loss": 2.344025802612305, "step": 109990 }, { "epoch": 0.8879345834375983, "grad_norm": 1.3556321859359741, "learning_rate": 2.2429181845351085e-06, "loss": 2.23426513671875, "step": 110000 }, { "epoch": 0.8880153047633654, "grad_norm": 0.6160505414009094, "learning_rate": 2.2413027147968953e-06, "loss": 1.8501909255981446, "step": 110010 }, { "epoch": 0.8880960260891325, "grad_norm": 1.0591540336608887, "learning_rate": 2.239687245058682e-06, "loss": 2.5442834854125977, "step": 110020 }, { "epoch": 0.8881767474148995, "grad_norm": 0.9725099802017212, "learning_rate": 2.238071775320469e-06, "loss": 2.1208641052246096, "step": 110030 }, { "epoch": 0.8882574687406666, "grad_norm": 0.39272916316986084, "learning_rate": 2.236456305582256e-06, "loss": 2.190287971496582, "step": 110040 }, { "epoch": 0.8883381900664337, "grad_norm": 0.6769676804542542, "learning_rate": 2.2348408358440427e-06, "loss": 2.346369743347168, "step": 110050 }, { "epoch": 0.8884189113922007, "grad_norm": 0.7016764879226685, "learning_rate": 2.2332253661058295e-06, "loss": 2.438618469238281, "step": 110060 }, { "epoch": 0.8884996327179677, "grad_norm": 0.9662745594978333, "learning_rate": 2.2316098963676164e-06, "loss": 2.383137512207031, "step": 110070 }, { "epoch": 0.8885803540437348, "grad_norm": 0.7359046339988708, "learning_rate": 2.2299944266294032e-06, "loss": 2.3975128173828124, "step": 110080 }, { "epoch": 0.8886610753695019, "grad_norm": 0.713485062122345, "learning_rate": 2.22837895689119e-06, "loss": 2.485713768005371, "step": 110090 }, { "epoch": 0.8887417966952689, "grad_norm": 1.1053322553634644, "learning_rate": 2.226763487152977e-06, "loss": 2.058085823059082, "step": 110100 }, { "epoch": 0.888822518021036, "grad_norm": 0.8681057691574097, "learning_rate": 2.2251480174147638e-06, "loss": 2.6330583572387694, "step": 110110 }, { "epoch": 0.888903239346803, "grad_norm": 0.9855957627296448, "learning_rate": 2.223532547676551e-06, "loss": 2.5025495529174804, "step": 110120 }, { "epoch": 0.8889839606725701, "grad_norm": 0.4803568422794342, "learning_rate": 2.221917077938338e-06, "loss": 2.644181251525879, "step": 110130 }, { "epoch": 0.8890646819983371, "grad_norm": 1.2573989629745483, "learning_rate": 2.2203016082001247e-06, "loss": 2.311777877807617, "step": 110140 }, { "epoch": 0.8891454033241042, "grad_norm": 0.7366666793823242, "learning_rate": 2.2186861384619116e-06, "loss": 2.004953384399414, "step": 110150 }, { "epoch": 0.8892261246498713, "grad_norm": 1.6708283424377441, "learning_rate": 2.2170706687236984e-06, "loss": 2.6041849136352537, "step": 110160 }, { "epoch": 0.8893068459756382, "grad_norm": 1.0659793615341187, "learning_rate": 2.2154551989854853e-06, "loss": 2.2314308166503904, "step": 110170 }, { "epoch": 0.8893875673014053, "grad_norm": 0.7579656839370728, "learning_rate": 2.213839729247272e-06, "loss": 2.1517494201660154, "step": 110180 }, { "epoch": 0.8894682886271724, "grad_norm": 0.8837307691574097, "learning_rate": 2.212224259509059e-06, "loss": 2.2903907775878904, "step": 110190 }, { "epoch": 0.8895490099529395, "grad_norm": 0.7944055199623108, "learning_rate": 2.210608789770846e-06, "loss": 2.3762628555297853, "step": 110200 }, { "epoch": 0.8896297312787065, "grad_norm": 0.6555182337760925, "learning_rate": 2.2089933200326326e-06, "loss": 1.9543563842773437, "step": 110210 }, { "epoch": 0.8897104526044736, "grad_norm": 1.0576035976409912, "learning_rate": 2.2073778502944195e-06, "loss": 3.0585039138793944, "step": 110220 }, { "epoch": 0.8897911739302407, "grad_norm": 0.9677538871765137, "learning_rate": 2.2057623805562063e-06, "loss": 1.96661319732666, "step": 110230 }, { "epoch": 0.8898718952560077, "grad_norm": 1.5456236600875854, "learning_rate": 2.204146910817993e-06, "loss": 2.963161849975586, "step": 110240 }, { "epoch": 0.8899526165817747, "grad_norm": 1.6656781435012817, "learning_rate": 2.20253144107978e-06, "loss": 2.695332717895508, "step": 110250 }, { "epoch": 0.8900333379075418, "grad_norm": 0.6977900266647339, "learning_rate": 2.200915971341567e-06, "loss": 2.739178466796875, "step": 110260 }, { "epoch": 0.8901140592333089, "grad_norm": 0.9892831444740295, "learning_rate": 2.1993005016033537e-06, "loss": 2.080337142944336, "step": 110270 }, { "epoch": 0.8901947805590759, "grad_norm": 1.3251701593399048, "learning_rate": 2.1976850318651406e-06, "loss": 2.2662094116210936, "step": 110280 }, { "epoch": 0.890275501884843, "grad_norm": 0.6917227506637573, "learning_rate": 2.196069562126928e-06, "loss": 2.230863571166992, "step": 110290 }, { "epoch": 0.89035622321061, "grad_norm": 0.9053910970687866, "learning_rate": 2.1944540923887147e-06, "loss": 2.449778175354004, "step": 110300 }, { "epoch": 0.8904369445363771, "grad_norm": 1.22115957736969, "learning_rate": 2.1928386226505015e-06, "loss": 2.399099922180176, "step": 110310 }, { "epoch": 0.8905176658621441, "grad_norm": 0.9113457798957825, "learning_rate": 2.1912231529122884e-06, "loss": 2.3792741775512694, "step": 110320 }, { "epoch": 0.8905983871879112, "grad_norm": 1.0455358028411865, "learning_rate": 2.1896076831740752e-06, "loss": 2.3932384490966796, "step": 110330 }, { "epoch": 0.8906791085136783, "grad_norm": 1.031715989112854, "learning_rate": 2.187992213435862e-06, "loss": 2.564092254638672, "step": 110340 }, { "epoch": 0.8907598298394452, "grad_norm": 0.8087921142578125, "learning_rate": 2.186376743697649e-06, "loss": 2.815073585510254, "step": 110350 }, { "epoch": 0.8908405511652123, "grad_norm": 0.5039992928504944, "learning_rate": 2.1847612739594358e-06, "loss": 2.3325414657592773, "step": 110360 }, { "epoch": 0.8909212724909794, "grad_norm": 1.5396881103515625, "learning_rate": 2.1831458042212226e-06, "loss": 2.6487598419189453, "step": 110370 }, { "epoch": 0.8910019938167465, "grad_norm": 1.1974564790725708, "learning_rate": 2.1815303344830094e-06, "loss": 2.7736860275268556, "step": 110380 }, { "epoch": 0.8910827151425135, "grad_norm": 1.0797216892242432, "learning_rate": 2.1799148647447963e-06, "loss": 2.114995765686035, "step": 110390 }, { "epoch": 0.8911634364682806, "grad_norm": 0.9353268146514893, "learning_rate": 2.178299395006583e-06, "loss": 2.588511085510254, "step": 110400 }, { "epoch": 0.8912441577940476, "grad_norm": 1.0310280323028564, "learning_rate": 2.17668392526837e-06, "loss": 2.197827911376953, "step": 110410 }, { "epoch": 0.8913248791198146, "grad_norm": 1.0798816680908203, "learning_rate": 2.175068455530157e-06, "loss": 2.584192657470703, "step": 110420 }, { "epoch": 0.8914056004455817, "grad_norm": 1.7353827953338623, "learning_rate": 2.1734529857919437e-06, "loss": 2.4454397201538085, "step": 110430 }, { "epoch": 0.8914863217713488, "grad_norm": 1.0085185766220093, "learning_rate": 2.171837516053731e-06, "loss": 2.120771026611328, "step": 110440 }, { "epoch": 0.8915670430971159, "grad_norm": 0.8283659815788269, "learning_rate": 2.1702220463155178e-06, "loss": 2.198764991760254, "step": 110450 }, { "epoch": 0.8916477644228828, "grad_norm": 1.0766513347625732, "learning_rate": 2.1686065765773046e-06, "loss": 2.69457950592041, "step": 110460 }, { "epoch": 0.8917284857486499, "grad_norm": 0.9632318019866943, "learning_rate": 2.1669911068390915e-06, "loss": 2.2354595184326174, "step": 110470 }, { "epoch": 0.891809207074417, "grad_norm": 1.133914589881897, "learning_rate": 2.1653756371008783e-06, "loss": 2.1541038513183595, "step": 110480 }, { "epoch": 0.891889928400184, "grad_norm": 0.7807389497756958, "learning_rate": 2.163760167362665e-06, "loss": 2.550180435180664, "step": 110490 }, { "epoch": 0.8919706497259511, "grad_norm": 0.7946780920028687, "learning_rate": 2.162144697624452e-06, "loss": 2.477615165710449, "step": 110500 }, { "epoch": 0.8920513710517182, "grad_norm": 1.3149417638778687, "learning_rate": 2.160529227886239e-06, "loss": 2.3059959411621094, "step": 110510 }, { "epoch": 0.8921320923774853, "grad_norm": 0.7645613551139832, "learning_rate": 2.1589137581480257e-06, "loss": 2.763409233093262, "step": 110520 }, { "epoch": 0.8922128137032522, "grad_norm": 1.2620415687561035, "learning_rate": 2.1572982884098126e-06, "loss": 2.536410903930664, "step": 110530 }, { "epoch": 0.8922935350290193, "grad_norm": 0.9239177703857422, "learning_rate": 2.1556828186715994e-06, "loss": 2.6203033447265627, "step": 110540 }, { "epoch": 0.8923742563547864, "grad_norm": 0.8451796770095825, "learning_rate": 2.1540673489333862e-06, "loss": 2.5571868896484373, "step": 110550 }, { "epoch": 0.8924549776805534, "grad_norm": 0.6847488880157471, "learning_rate": 2.152451879195173e-06, "loss": 2.5001163482666016, "step": 110560 }, { "epoch": 0.8925356990063205, "grad_norm": 1.3333109617233276, "learning_rate": 2.15083640945696e-06, "loss": 2.2661447525024414, "step": 110570 }, { "epoch": 0.8926164203320875, "grad_norm": 1.1902104616165161, "learning_rate": 2.1492209397187468e-06, "loss": 2.461262512207031, "step": 110580 }, { "epoch": 0.8926971416578546, "grad_norm": 0.8159187436103821, "learning_rate": 2.1476054699805336e-06, "loss": 2.1898027420043946, "step": 110590 }, { "epoch": 0.8927778629836216, "grad_norm": 0.983241081237793, "learning_rate": 2.1459900002423205e-06, "loss": 2.2715415954589844, "step": 110600 }, { "epoch": 0.8928585843093887, "grad_norm": 1.7252622842788696, "learning_rate": 2.1443745305041073e-06, "loss": 2.1859504699707033, "step": 110610 }, { "epoch": 0.8929393056351558, "grad_norm": 0.6080725193023682, "learning_rate": 2.142759060765894e-06, "loss": 2.15460319519043, "step": 110620 }, { "epoch": 0.8930200269609228, "grad_norm": 0.8481811881065369, "learning_rate": 2.141143591027681e-06, "loss": 2.3538848876953127, "step": 110630 }, { "epoch": 0.8931007482866898, "grad_norm": 0.5546727776527405, "learning_rate": 2.1395281212894683e-06, "loss": 2.7356386184692383, "step": 110640 }, { "epoch": 0.8931814696124569, "grad_norm": 1.1393102407455444, "learning_rate": 2.137912651551255e-06, "loss": 2.445005989074707, "step": 110650 }, { "epoch": 0.893262190938224, "grad_norm": 0.5784816145896912, "learning_rate": 2.136297181813042e-06, "loss": 1.9365667343139648, "step": 110660 }, { "epoch": 0.893342912263991, "grad_norm": 0.8532673120498657, "learning_rate": 2.134681712074829e-06, "loss": 2.193806838989258, "step": 110670 }, { "epoch": 0.8934236335897581, "grad_norm": 0.8592265844345093, "learning_rate": 2.1330662423366157e-06, "loss": 2.8184757232666016, "step": 110680 }, { "epoch": 0.8935043549155252, "grad_norm": 1.0786547660827637, "learning_rate": 2.1314507725984025e-06, "loss": 2.8467853546142576, "step": 110690 }, { "epoch": 0.8935850762412922, "grad_norm": 1.0206027030944824, "learning_rate": 2.1298353028601894e-06, "loss": 2.451957893371582, "step": 110700 }, { "epoch": 0.8936657975670592, "grad_norm": 1.0265185832977295, "learning_rate": 2.128219833121976e-06, "loss": 2.0642675399780273, "step": 110710 }, { "epoch": 0.8937465188928263, "grad_norm": 1.1801748275756836, "learning_rate": 2.126604363383763e-06, "loss": 2.5800365447998046, "step": 110720 }, { "epoch": 0.8938272402185934, "grad_norm": 0.8855971097946167, "learning_rate": 2.12498889364555e-06, "loss": 2.1762332916259766, "step": 110730 }, { "epoch": 0.8939079615443604, "grad_norm": 0.9419064521789551, "learning_rate": 2.1233734239073367e-06, "loss": 2.0809871673583986, "step": 110740 }, { "epoch": 0.8939886828701274, "grad_norm": 0.708475649356842, "learning_rate": 2.121757954169124e-06, "loss": 2.8227649688720704, "step": 110750 }, { "epoch": 0.8940694041958945, "grad_norm": 1.3869818449020386, "learning_rate": 2.120142484430911e-06, "loss": 2.502008628845215, "step": 110760 }, { "epoch": 0.8941501255216616, "grad_norm": 1.6612228155136108, "learning_rate": 2.1185270146926977e-06, "loss": 2.510972595214844, "step": 110770 }, { "epoch": 0.8942308468474286, "grad_norm": 0.6949499249458313, "learning_rate": 2.1169115449544845e-06, "loss": 2.4260095596313476, "step": 110780 }, { "epoch": 0.8943115681731957, "grad_norm": 0.9072438478469849, "learning_rate": 2.1152960752162714e-06, "loss": 2.3605560302734374, "step": 110790 }, { "epoch": 0.8943922894989628, "grad_norm": 1.1343973875045776, "learning_rate": 2.1136806054780582e-06, "loss": 1.9878995895385743, "step": 110800 }, { "epoch": 0.8944730108247297, "grad_norm": 0.8319181203842163, "learning_rate": 2.112065135739845e-06, "loss": 2.104914093017578, "step": 110810 }, { "epoch": 0.8945537321504968, "grad_norm": 0.7538936734199524, "learning_rate": 2.110449666001632e-06, "loss": 2.2549047470092773, "step": 110820 }, { "epoch": 0.8946344534762639, "grad_norm": 0.9367977976799011, "learning_rate": 2.1088341962634188e-06, "loss": 2.6851261138916014, "step": 110830 }, { "epoch": 0.894715174802031, "grad_norm": 0.5537506341934204, "learning_rate": 2.1072187265252056e-06, "loss": 2.362227439880371, "step": 110840 }, { "epoch": 0.894795896127798, "grad_norm": 0.8802411556243896, "learning_rate": 2.1056032567869925e-06, "loss": 2.1830238342285155, "step": 110850 }, { "epoch": 0.8948766174535651, "grad_norm": 1.214233160018921, "learning_rate": 2.1039877870487793e-06, "loss": 2.751304817199707, "step": 110860 }, { "epoch": 0.8949573387793321, "grad_norm": 1.1537624597549438, "learning_rate": 2.102372317310566e-06, "loss": 2.2582685470581056, "step": 110870 }, { "epoch": 0.8950380601050991, "grad_norm": 0.8115109205245972, "learning_rate": 2.100756847572353e-06, "loss": 2.5513845443725587, "step": 110880 }, { "epoch": 0.8951187814308662, "grad_norm": 0.6957923173904419, "learning_rate": 2.09914137783414e-06, "loss": 2.2706577301025392, "step": 110890 }, { "epoch": 0.8951995027566333, "grad_norm": 0.6066310405731201, "learning_rate": 2.0975259080959267e-06, "loss": 2.1437807083129883, "step": 110900 }, { "epoch": 0.8952802240824004, "grad_norm": 0.6402443647384644, "learning_rate": 2.0959104383577135e-06, "loss": 2.1420171737670897, "step": 110910 }, { "epoch": 0.8953609454081674, "grad_norm": 0.7481517195701599, "learning_rate": 2.0942949686195004e-06, "loss": 2.5589218139648438, "step": 110920 }, { "epoch": 0.8954416667339344, "grad_norm": 1.5711654424667358, "learning_rate": 2.0926794988812872e-06, "loss": 2.0209287643432616, "step": 110930 }, { "epoch": 0.8955223880597015, "grad_norm": 0.9186103940010071, "learning_rate": 2.091064029143074e-06, "loss": 2.5423694610595704, "step": 110940 }, { "epoch": 0.8956031093854685, "grad_norm": 0.7104706168174744, "learning_rate": 2.089448559404861e-06, "loss": 2.306105041503906, "step": 110950 }, { "epoch": 0.8956838307112356, "grad_norm": 0.45650729537010193, "learning_rate": 2.0878330896666478e-06, "loss": 2.474382209777832, "step": 110960 }, { "epoch": 0.8957645520370027, "grad_norm": 1.0432443618774414, "learning_rate": 2.0862176199284346e-06, "loss": 2.3162424087524416, "step": 110970 }, { "epoch": 0.8958452733627698, "grad_norm": 0.7578866481781006, "learning_rate": 2.0846021501902215e-06, "loss": 1.8338571548461915, "step": 110980 }, { "epoch": 0.8959259946885367, "grad_norm": 1.2795792818069458, "learning_rate": 2.0829866804520087e-06, "loss": 2.2274770736694336, "step": 110990 }, { "epoch": 0.8960067160143038, "grad_norm": 0.6985892653465271, "learning_rate": 2.0813712107137956e-06, "loss": 2.4241989135742186, "step": 111000 }, { "epoch": 0.8960874373400709, "grad_norm": 0.8981932401657104, "learning_rate": 2.0797557409755824e-06, "loss": 2.170152473449707, "step": 111010 }, { "epoch": 0.8961681586658379, "grad_norm": 0.3252522051334381, "learning_rate": 2.0781402712373693e-06, "loss": 2.197001075744629, "step": 111020 }, { "epoch": 0.896248879991605, "grad_norm": 0.5512551665306091, "learning_rate": 2.076524801499156e-06, "loss": 1.8527324676513672, "step": 111030 }, { "epoch": 0.896329601317372, "grad_norm": 0.6988356709480286, "learning_rate": 2.074909331760943e-06, "loss": 2.548252487182617, "step": 111040 }, { "epoch": 0.8964103226431391, "grad_norm": 0.6490814089775085, "learning_rate": 2.07329386202273e-06, "loss": 2.733251762390137, "step": 111050 }, { "epoch": 0.8964910439689061, "grad_norm": 0.9804689884185791, "learning_rate": 2.0716783922845166e-06, "loss": 2.107676887512207, "step": 111060 }, { "epoch": 0.8965717652946732, "grad_norm": 0.5951857566833496, "learning_rate": 2.0700629225463035e-06, "loss": 2.3697111129760744, "step": 111070 }, { "epoch": 0.8966524866204403, "grad_norm": 0.8166252374649048, "learning_rate": 2.0684474528080908e-06, "loss": 2.3878271102905275, "step": 111080 }, { "epoch": 0.8967332079462073, "grad_norm": 1.199557900428772, "learning_rate": 2.0668319830698776e-06, "loss": 2.092255973815918, "step": 111090 }, { "epoch": 0.8968139292719743, "grad_norm": 0.6661637425422668, "learning_rate": 2.0652165133316644e-06, "loss": 3.0112897872924806, "step": 111100 }, { "epoch": 0.8968946505977414, "grad_norm": 0.7989974021911621, "learning_rate": 2.0636010435934513e-06, "loss": 2.2903432846069336, "step": 111110 }, { "epoch": 0.8969753719235085, "grad_norm": 0.4471665322780609, "learning_rate": 2.061985573855238e-06, "loss": 2.281926918029785, "step": 111120 }, { "epoch": 0.8970560932492755, "grad_norm": 0.8363310098648071, "learning_rate": 2.060370104117025e-06, "loss": 2.2667974472045898, "step": 111130 }, { "epoch": 0.8971368145750426, "grad_norm": 1.0937316417694092, "learning_rate": 2.058754634378812e-06, "loss": 2.5130176544189453, "step": 111140 }, { "epoch": 0.8972175359008097, "grad_norm": 0.447838693857193, "learning_rate": 2.0571391646405987e-06, "loss": 2.1546609878540037, "step": 111150 }, { "epoch": 0.8972982572265766, "grad_norm": 0.949202835559845, "learning_rate": 2.0555236949023855e-06, "loss": 2.53981990814209, "step": 111160 }, { "epoch": 0.8973789785523437, "grad_norm": 0.7143539786338806, "learning_rate": 2.0539082251641724e-06, "loss": 2.2263010025024412, "step": 111170 }, { "epoch": 0.8974596998781108, "grad_norm": 1.04228675365448, "learning_rate": 2.0522927554259592e-06, "loss": 2.151746940612793, "step": 111180 }, { "epoch": 0.8975404212038779, "grad_norm": 0.7897167205810547, "learning_rate": 2.050677285687746e-06, "loss": 2.736839485168457, "step": 111190 }, { "epoch": 0.8976211425296449, "grad_norm": 0.7541084885597229, "learning_rate": 2.049061815949533e-06, "loss": 2.653793716430664, "step": 111200 }, { "epoch": 0.897701863855412, "grad_norm": 1.3509931564331055, "learning_rate": 2.0474463462113198e-06, "loss": 2.2449758529663084, "step": 111210 }, { "epoch": 0.897782585181179, "grad_norm": 1.3815767765045166, "learning_rate": 2.0458308764731066e-06, "loss": 2.273764801025391, "step": 111220 }, { "epoch": 0.8978633065069461, "grad_norm": 1.3034110069274902, "learning_rate": 2.0442154067348934e-06, "loss": 2.535780334472656, "step": 111230 }, { "epoch": 0.8979440278327131, "grad_norm": 0.7652978301048279, "learning_rate": 2.0425999369966803e-06, "loss": 2.0094953536987306, "step": 111240 }, { "epoch": 0.8980247491584802, "grad_norm": 0.7492444515228271, "learning_rate": 2.040984467258467e-06, "loss": 2.085692596435547, "step": 111250 }, { "epoch": 0.8981054704842473, "grad_norm": 0.512654185295105, "learning_rate": 2.039368997520254e-06, "loss": 2.1721031188964846, "step": 111260 }, { "epoch": 0.8981861918100142, "grad_norm": 1.337213158607483, "learning_rate": 2.037753527782041e-06, "loss": 2.4642757415771483, "step": 111270 }, { "epoch": 0.8982669131357813, "grad_norm": 0.6338157057762146, "learning_rate": 2.0361380580438277e-06, "loss": 2.5396953582763673, "step": 111280 }, { "epoch": 0.8983476344615484, "grad_norm": 0.9331312775611877, "learning_rate": 2.0345225883056145e-06, "loss": 2.439609909057617, "step": 111290 }, { "epoch": 0.8984283557873155, "grad_norm": 0.9699342250823975, "learning_rate": 2.0329071185674014e-06, "loss": 2.651129722595215, "step": 111300 }, { "epoch": 0.8985090771130825, "grad_norm": 1.0040358304977417, "learning_rate": 2.031291648829188e-06, "loss": 2.306382942199707, "step": 111310 }, { "epoch": 0.8985897984388496, "grad_norm": 0.6778640151023865, "learning_rate": 2.029676179090975e-06, "loss": 2.2444652557373046, "step": 111320 }, { "epoch": 0.8986705197646166, "grad_norm": 0.9206951856613159, "learning_rate": 2.0280607093527623e-06, "loss": 2.3788730621337892, "step": 111330 }, { "epoch": 0.8987512410903836, "grad_norm": 0.5306938290596008, "learning_rate": 2.026445239614549e-06, "loss": 2.355160713195801, "step": 111340 }, { "epoch": 0.8988319624161507, "grad_norm": 1.3021047115325928, "learning_rate": 2.024829769876336e-06, "loss": 2.122114562988281, "step": 111350 }, { "epoch": 0.8989126837419178, "grad_norm": 0.9154484272003174, "learning_rate": 2.023214300138123e-06, "loss": 2.8817068099975587, "step": 111360 }, { "epoch": 0.8989934050676849, "grad_norm": 0.6100298762321472, "learning_rate": 2.0215988303999097e-06, "loss": 2.3933990478515623, "step": 111370 }, { "epoch": 0.8990741263934519, "grad_norm": 0.8289446234703064, "learning_rate": 2.0199833606616966e-06, "loss": 2.4726572036743164, "step": 111380 }, { "epoch": 0.8991548477192189, "grad_norm": 0.9902905821800232, "learning_rate": 2.0183678909234834e-06, "loss": 2.6608409881591797, "step": 111390 }, { "epoch": 0.899235569044986, "grad_norm": 0.6367990970611572, "learning_rate": 2.0167524211852702e-06, "loss": 2.7469820022583007, "step": 111400 }, { "epoch": 0.899316290370753, "grad_norm": 1.4758166074752808, "learning_rate": 2.015136951447057e-06, "loss": 2.4260583877563477, "step": 111410 }, { "epoch": 0.8993970116965201, "grad_norm": 0.8646080493927002, "learning_rate": 2.0135214817088444e-06, "loss": 2.429148864746094, "step": 111420 }, { "epoch": 0.8994777330222872, "grad_norm": 0.6559770703315735, "learning_rate": 2.011906011970631e-06, "loss": 2.8200010299682616, "step": 111430 }, { "epoch": 0.8995584543480543, "grad_norm": 0.5543314218521118, "learning_rate": 2.010290542232418e-06, "loss": 2.4446897506713867, "step": 111440 }, { "epoch": 0.8996391756738212, "grad_norm": 0.7922340035438538, "learning_rate": 2.008675072494205e-06, "loss": 2.5040910720825194, "step": 111450 }, { "epoch": 0.8997198969995883, "grad_norm": 1.0343097448349, "learning_rate": 2.0070596027559917e-06, "loss": 2.727303886413574, "step": 111460 }, { "epoch": 0.8998006183253554, "grad_norm": 0.7641758322715759, "learning_rate": 2.0054441330177786e-06, "loss": 1.9431718826293944, "step": 111470 }, { "epoch": 0.8998813396511224, "grad_norm": 0.7257822155952454, "learning_rate": 2.0038286632795654e-06, "loss": 2.077435302734375, "step": 111480 }, { "epoch": 0.8999620609768895, "grad_norm": 0.4333302974700928, "learning_rate": 2.0022131935413523e-06, "loss": 1.8748979568481445, "step": 111490 }, { "epoch": 0.9000427823026566, "grad_norm": 0.4594617784023285, "learning_rate": 2.000597723803139e-06, "loss": 2.5464900970458983, "step": 111500 }, { "epoch": 0.9001235036284236, "grad_norm": 0.9981868863105774, "learning_rate": 1.998982254064926e-06, "loss": 2.16304988861084, "step": 111510 }, { "epoch": 0.9002042249541906, "grad_norm": 0.7477139830589294, "learning_rate": 1.997366784326713e-06, "loss": 2.712343215942383, "step": 111520 }, { "epoch": 0.9002849462799577, "grad_norm": 1.4373434782028198, "learning_rate": 1.9957513145884997e-06, "loss": 3.0840839385986327, "step": 111530 }, { "epoch": 0.9003656676057248, "grad_norm": 1.5008267164230347, "learning_rate": 1.9941358448502865e-06, "loss": 2.640938949584961, "step": 111540 }, { "epoch": 0.9004463889314918, "grad_norm": 1.1996605396270752, "learning_rate": 1.9925203751120734e-06, "loss": 2.44363956451416, "step": 111550 }, { "epoch": 0.9005271102572588, "grad_norm": 0.9760622382164001, "learning_rate": 1.99090490537386e-06, "loss": 2.0267152786254883, "step": 111560 }, { "epoch": 0.9006078315830259, "grad_norm": 0.7235788702964783, "learning_rate": 1.989289435635647e-06, "loss": 2.520689582824707, "step": 111570 }, { "epoch": 0.900688552908793, "grad_norm": 0.9481707215309143, "learning_rate": 1.987673965897434e-06, "loss": 2.3134954452514647, "step": 111580 }, { "epoch": 0.90076927423456, "grad_norm": 0.654761016368866, "learning_rate": 1.9860584961592207e-06, "loss": 2.475216293334961, "step": 111590 }, { "epoch": 0.9008499955603271, "grad_norm": 0.9164109230041504, "learning_rate": 1.9844430264210076e-06, "loss": 2.232559013366699, "step": 111600 }, { "epoch": 0.9009307168860942, "grad_norm": 0.9497491717338562, "learning_rate": 1.9828275566827944e-06, "loss": 2.23909969329834, "step": 111610 }, { "epoch": 0.9010114382118611, "grad_norm": 1.2407630681991577, "learning_rate": 1.9812120869445813e-06, "loss": 2.614274597167969, "step": 111620 }, { "epoch": 0.9010921595376282, "grad_norm": 1.6341885328292847, "learning_rate": 1.979596617206368e-06, "loss": 2.030861663818359, "step": 111630 }, { "epoch": 0.9011728808633953, "grad_norm": 0.7157053351402283, "learning_rate": 1.977981147468155e-06, "loss": 2.234770584106445, "step": 111640 }, { "epoch": 0.9012536021891624, "grad_norm": 0.7205122113227844, "learning_rate": 1.976365677729942e-06, "loss": 2.4135629653930666, "step": 111650 }, { "epoch": 0.9013343235149294, "grad_norm": 0.7696064710617065, "learning_rate": 1.9747502079917287e-06, "loss": 2.6316158294677736, "step": 111660 }, { "epoch": 0.9014150448406965, "grad_norm": 1.302871584892273, "learning_rate": 1.9731347382535155e-06, "loss": 2.3900413513183594, "step": 111670 }, { "epoch": 0.9014957661664635, "grad_norm": 1.6759529113769531, "learning_rate": 1.9715192685153028e-06, "loss": 2.0903114318847655, "step": 111680 }, { "epoch": 0.9015764874922306, "grad_norm": 0.5370669960975647, "learning_rate": 1.9699037987770896e-06, "loss": 2.414030075073242, "step": 111690 }, { "epoch": 0.9016572088179976, "grad_norm": 0.8788663148880005, "learning_rate": 1.9682883290388765e-06, "loss": 2.0804052352905273, "step": 111700 }, { "epoch": 0.9017379301437647, "grad_norm": 0.8483129143714905, "learning_rate": 1.9666728593006633e-06, "loss": 2.636593246459961, "step": 111710 }, { "epoch": 0.9018186514695318, "grad_norm": 1.5365409851074219, "learning_rate": 1.96505738956245e-06, "loss": 2.6398086547851562, "step": 111720 }, { "epoch": 0.9018993727952987, "grad_norm": 0.5227527022361755, "learning_rate": 1.963441919824237e-06, "loss": 1.8892551422119142, "step": 111730 }, { "epoch": 0.9019800941210658, "grad_norm": 0.689998984336853, "learning_rate": 1.961826450086024e-06, "loss": 2.164752197265625, "step": 111740 }, { "epoch": 0.9020608154468329, "grad_norm": 1.395579218864441, "learning_rate": 1.9602109803478107e-06, "loss": 2.6281753540039063, "step": 111750 }, { "epoch": 0.9021415367726, "grad_norm": 1.1645548343658447, "learning_rate": 1.9585955106095975e-06, "loss": 2.1822296142578126, "step": 111760 }, { "epoch": 0.902222258098367, "grad_norm": 0.7708448767662048, "learning_rate": 1.956980040871385e-06, "loss": 2.027307891845703, "step": 111770 }, { "epoch": 0.9023029794241341, "grad_norm": 1.0915676355361938, "learning_rate": 1.9553645711331717e-06, "loss": 2.826316261291504, "step": 111780 }, { "epoch": 0.9023837007499012, "grad_norm": 0.4778444766998291, "learning_rate": 1.9537491013949585e-06, "loss": 2.356594276428223, "step": 111790 }, { "epoch": 0.9024644220756681, "grad_norm": 1.1741453409194946, "learning_rate": 1.9521336316567453e-06, "loss": 2.512959671020508, "step": 111800 }, { "epoch": 0.9025451434014352, "grad_norm": 1.3142123222351074, "learning_rate": 1.950518161918532e-06, "loss": 2.566959571838379, "step": 111810 }, { "epoch": 0.9026258647272023, "grad_norm": 0.7970810532569885, "learning_rate": 1.948902692180319e-06, "loss": 2.6534534454345704, "step": 111820 }, { "epoch": 0.9027065860529694, "grad_norm": 1.0388171672821045, "learning_rate": 1.947287222442106e-06, "loss": 2.2617681503295897, "step": 111830 }, { "epoch": 0.9027873073787364, "grad_norm": 0.8310197591781616, "learning_rate": 1.9456717527038927e-06, "loss": 2.4387224197387694, "step": 111840 }, { "epoch": 0.9028680287045034, "grad_norm": 1.019547700881958, "learning_rate": 1.9440562829656796e-06, "loss": 2.5608137130737303, "step": 111850 }, { "epoch": 0.9029487500302705, "grad_norm": 0.6855390071868896, "learning_rate": 1.9424408132274664e-06, "loss": 2.503080940246582, "step": 111860 }, { "epoch": 0.9030294713560375, "grad_norm": 0.8231856822967529, "learning_rate": 1.9408253434892533e-06, "loss": 2.7121253967285157, "step": 111870 }, { "epoch": 0.9031101926818046, "grad_norm": 1.725598692893982, "learning_rate": 1.93920987375104e-06, "loss": 2.4144596099853515, "step": 111880 }, { "epoch": 0.9031909140075717, "grad_norm": 1.2121223211288452, "learning_rate": 1.937594404012827e-06, "loss": 2.2488712310791015, "step": 111890 }, { "epoch": 0.9032716353333388, "grad_norm": 0.8161919713020325, "learning_rate": 1.935978934274614e-06, "loss": 2.555302619934082, "step": 111900 }, { "epoch": 0.9033523566591057, "grad_norm": 0.7164114713668823, "learning_rate": 1.9343634645364006e-06, "loss": 2.6075490951538085, "step": 111910 }, { "epoch": 0.9034330779848728, "grad_norm": 0.8541378378868103, "learning_rate": 1.9327479947981875e-06, "loss": 2.449032020568848, "step": 111920 }, { "epoch": 0.9035137993106399, "grad_norm": 1.2692891359329224, "learning_rate": 1.9311325250599743e-06, "loss": 2.2935159683227537, "step": 111930 }, { "epoch": 0.9035945206364069, "grad_norm": 0.7748680710792542, "learning_rate": 1.929517055321761e-06, "loss": 2.4667613983154295, "step": 111940 }, { "epoch": 0.903675241962174, "grad_norm": 3.078813076019287, "learning_rate": 1.927901585583548e-06, "loss": 2.7355182647705076, "step": 111950 }, { "epoch": 0.903755963287941, "grad_norm": 0.9262275099754333, "learning_rate": 1.926286115845335e-06, "loss": 2.1170513153076174, "step": 111960 }, { "epoch": 0.9038366846137081, "grad_norm": 1.4092013835906982, "learning_rate": 1.9246706461071217e-06, "loss": 2.097559356689453, "step": 111970 }, { "epoch": 0.9039174059394751, "grad_norm": 1.276792049407959, "learning_rate": 1.9230551763689086e-06, "loss": 2.046840858459473, "step": 111980 }, { "epoch": 0.9039981272652422, "grad_norm": 0.673994243144989, "learning_rate": 1.9214397066306954e-06, "loss": 2.279387855529785, "step": 111990 }, { "epoch": 0.9040788485910093, "grad_norm": 0.8444498777389526, "learning_rate": 1.9198242368924827e-06, "loss": 2.4749013900756838, "step": 112000 }, { "epoch": 0.9041595699167763, "grad_norm": 1.1595903635025024, "learning_rate": 1.9182087671542695e-06, "loss": 2.149002456665039, "step": 112010 }, { "epoch": 0.9042402912425433, "grad_norm": 1.2874913215637207, "learning_rate": 1.9165932974160564e-06, "loss": 2.5628215789794924, "step": 112020 }, { "epoch": 0.9043210125683104, "grad_norm": 0.8380564451217651, "learning_rate": 1.9149778276778432e-06, "loss": 2.1798961639404295, "step": 112030 }, { "epoch": 0.9044017338940775, "grad_norm": 0.9193915724754333, "learning_rate": 1.91336235793963e-06, "loss": 2.5882015228271484, "step": 112040 }, { "epoch": 0.9044824552198445, "grad_norm": 1.2409919500350952, "learning_rate": 1.911746888201417e-06, "loss": 2.2539928436279295, "step": 112050 }, { "epoch": 0.9045631765456116, "grad_norm": 0.9604059457778931, "learning_rate": 1.9101314184632038e-06, "loss": 2.24332275390625, "step": 112060 }, { "epoch": 0.9046438978713787, "grad_norm": 1.396696925163269, "learning_rate": 1.9085159487249906e-06, "loss": 2.125807189941406, "step": 112070 }, { "epoch": 0.9047246191971456, "grad_norm": 0.7284603714942932, "learning_rate": 1.9069004789867777e-06, "loss": 2.495855712890625, "step": 112080 }, { "epoch": 0.9048053405229127, "grad_norm": 0.6374467015266418, "learning_rate": 1.9052850092485645e-06, "loss": 2.709006500244141, "step": 112090 }, { "epoch": 0.9048860618486798, "grad_norm": 1.0935516357421875, "learning_rate": 1.9036695395103513e-06, "loss": 2.411526107788086, "step": 112100 }, { "epoch": 0.9049667831744469, "grad_norm": 0.7786495685577393, "learning_rate": 1.9020540697721382e-06, "loss": 2.4023414611816407, "step": 112110 }, { "epoch": 0.9050475045002139, "grad_norm": 0.6470749974250793, "learning_rate": 1.900438600033925e-06, "loss": 2.095307159423828, "step": 112120 }, { "epoch": 0.905128225825981, "grad_norm": 0.9370019435882568, "learning_rate": 1.8988231302957119e-06, "loss": 3.1795352935791015, "step": 112130 }, { "epoch": 0.905208947151748, "grad_norm": 0.6285709142684937, "learning_rate": 1.8972076605574987e-06, "loss": 2.0627010345458983, "step": 112140 }, { "epoch": 0.905289668477515, "grad_norm": 0.8434863090515137, "learning_rate": 1.8955921908192856e-06, "loss": 2.331295394897461, "step": 112150 }, { "epoch": 0.9053703898032821, "grad_norm": 0.8819018006324768, "learning_rate": 1.8939767210810724e-06, "loss": 2.0520858764648438, "step": 112160 }, { "epoch": 0.9054511111290492, "grad_norm": 0.9283183813095093, "learning_rate": 1.8923612513428593e-06, "loss": 2.6862747192382814, "step": 112170 }, { "epoch": 0.9055318324548163, "grad_norm": 1.5627880096435547, "learning_rate": 1.8907457816046463e-06, "loss": 2.832997703552246, "step": 112180 }, { "epoch": 0.9056125537805833, "grad_norm": 1.2019829750061035, "learning_rate": 1.8891303118664332e-06, "loss": 2.055528450012207, "step": 112190 }, { "epoch": 0.9056932751063503, "grad_norm": 0.73929762840271, "learning_rate": 1.88751484212822e-06, "loss": 2.6458391189575194, "step": 112200 }, { "epoch": 0.9057739964321174, "grad_norm": 1.0475491285324097, "learning_rate": 1.8858993723900069e-06, "loss": 2.131020355224609, "step": 112210 }, { "epoch": 0.9058547177578845, "grad_norm": 1.1391184329986572, "learning_rate": 1.8842839026517937e-06, "loss": 2.2843040466308593, "step": 112220 }, { "epoch": 0.9059354390836515, "grad_norm": 3.588955879211426, "learning_rate": 1.8826684329135806e-06, "loss": 3.257762145996094, "step": 112230 }, { "epoch": 0.9060161604094186, "grad_norm": 1.0717763900756836, "learning_rate": 1.8810529631753674e-06, "loss": 2.376246452331543, "step": 112240 }, { "epoch": 0.9060968817351857, "grad_norm": 0.8360853791236877, "learning_rate": 1.8794374934371542e-06, "loss": 2.4021142959594726, "step": 112250 }, { "epoch": 0.9061776030609526, "grad_norm": 0.6755949854850769, "learning_rate": 1.877822023698941e-06, "loss": 2.4002946853637694, "step": 112260 }, { "epoch": 0.9062583243867197, "grad_norm": 0.8027828931808472, "learning_rate": 1.876206553960728e-06, "loss": 2.206587219238281, "step": 112270 }, { "epoch": 0.9063390457124868, "grad_norm": 1.6691386699676514, "learning_rate": 1.8745910842225148e-06, "loss": 1.8620847702026366, "step": 112280 }, { "epoch": 0.9064197670382539, "grad_norm": 0.8729342818260193, "learning_rate": 1.8729756144843016e-06, "loss": 2.352840232849121, "step": 112290 }, { "epoch": 0.9065004883640209, "grad_norm": 1.3769283294677734, "learning_rate": 1.8713601447460885e-06, "loss": 2.6682056427001952, "step": 112300 }, { "epoch": 0.906581209689788, "grad_norm": 0.87813800573349, "learning_rate": 1.8697446750078755e-06, "loss": 2.371337127685547, "step": 112310 }, { "epoch": 0.906661931015555, "grad_norm": 0.8523724675178528, "learning_rate": 1.8681292052696626e-06, "loss": 1.963821029663086, "step": 112320 }, { "epoch": 0.906742652341322, "grad_norm": 0.6937603950500488, "learning_rate": 1.8665137355314494e-06, "loss": 2.9286659240722654, "step": 112330 }, { "epoch": 0.9068233736670891, "grad_norm": 1.1099283695220947, "learning_rate": 1.8648982657932363e-06, "loss": 2.7429738998413087, "step": 112340 }, { "epoch": 0.9069040949928562, "grad_norm": 0.4859519302845001, "learning_rate": 1.8632827960550231e-06, "loss": 2.4953067779541014, "step": 112350 }, { "epoch": 0.9069848163186233, "grad_norm": 0.435395747423172, "learning_rate": 1.86166732631681e-06, "loss": 2.5915481567382814, "step": 112360 }, { "epoch": 0.9070655376443902, "grad_norm": 0.88787841796875, "learning_rate": 1.8600518565785968e-06, "loss": 2.175193977355957, "step": 112370 }, { "epoch": 0.9071462589701573, "grad_norm": 1.0329324007034302, "learning_rate": 1.8584363868403837e-06, "loss": 2.233819580078125, "step": 112380 }, { "epoch": 0.9072269802959244, "grad_norm": 1.036882996559143, "learning_rate": 1.8568209171021705e-06, "loss": 2.273166275024414, "step": 112390 }, { "epoch": 0.9073077016216914, "grad_norm": 0.8925992846488953, "learning_rate": 1.8552054473639576e-06, "loss": 2.588114929199219, "step": 112400 }, { "epoch": 0.9073884229474585, "grad_norm": 3.317476987838745, "learning_rate": 1.8535899776257444e-06, "loss": 2.300921821594238, "step": 112410 }, { "epoch": 0.9074691442732256, "grad_norm": 3.275791645050049, "learning_rate": 1.8519745078875313e-06, "loss": 2.6029733657836913, "step": 112420 }, { "epoch": 0.9075498655989926, "grad_norm": 4.84155797958374, "learning_rate": 1.850359038149318e-06, "loss": 2.713192176818848, "step": 112430 }, { "epoch": 0.9076305869247596, "grad_norm": 4.0380425453186035, "learning_rate": 1.848743568411105e-06, "loss": 2.0315656661987305, "step": 112440 }, { "epoch": 0.9077113082505267, "grad_norm": 4.2716851234436035, "learning_rate": 1.8471280986728918e-06, "loss": 2.6327796936035157, "step": 112450 }, { "epoch": 0.9077920295762938, "grad_norm": 4.271800518035889, "learning_rate": 1.8455126289346786e-06, "loss": 2.3241594314575194, "step": 112460 }, { "epoch": 0.9078727509020608, "grad_norm": 3.8794236183166504, "learning_rate": 1.8438971591964655e-06, "loss": 2.4173906326293944, "step": 112470 }, { "epoch": 0.9079534722278279, "grad_norm": 3.9601688385009766, "learning_rate": 1.8422816894582523e-06, "loss": 1.8000314712524415, "step": 112480 }, { "epoch": 0.9080341935535949, "grad_norm": 5.450936794281006, "learning_rate": 1.8406662197200392e-06, "loss": 2.6125724792480467, "step": 112490 }, { "epoch": 0.908114914879362, "grad_norm": 4.0478620529174805, "learning_rate": 1.839050749981826e-06, "loss": 2.017961883544922, "step": 112500 }, { "epoch": 0.908195636205129, "grad_norm": 5.312716960906982, "learning_rate": 1.8374352802436129e-06, "loss": 2.0888656616210937, "step": 112510 }, { "epoch": 0.9082763575308961, "grad_norm": 2.779529333114624, "learning_rate": 1.8358198105053997e-06, "loss": 2.956567573547363, "step": 112520 }, { "epoch": 0.9083570788566632, "grad_norm": 3.072636604309082, "learning_rate": 1.8342043407671868e-06, "loss": 2.3130083084106445, "step": 112530 }, { "epoch": 0.9084378001824301, "grad_norm": 2.9988038539886475, "learning_rate": 1.8325888710289736e-06, "loss": 2.535810089111328, "step": 112540 }, { "epoch": 0.9085185215081972, "grad_norm": 3.7573964595794678, "learning_rate": 1.8309734012907605e-06, "loss": 2.1176218032836913, "step": 112550 }, { "epoch": 0.9085992428339643, "grad_norm": 2.6899187564849854, "learning_rate": 1.8293579315525473e-06, "loss": 2.0371307373046874, "step": 112560 }, { "epoch": 0.9086799641597314, "grad_norm": 5.093461513519287, "learning_rate": 1.8277424618143342e-06, "loss": 2.2766042709350587, "step": 112570 }, { "epoch": 0.9087606854854984, "grad_norm": 3.123720645904541, "learning_rate": 1.826126992076121e-06, "loss": 2.247960090637207, "step": 112580 }, { "epoch": 0.9088414068112655, "grad_norm": 3.8514785766601562, "learning_rate": 1.8245115223379078e-06, "loss": 2.4371875762939452, "step": 112590 }, { "epoch": 0.9089221281370325, "grad_norm": 2.4041800498962402, "learning_rate": 1.8228960525996947e-06, "loss": 1.8029308319091797, "step": 112600 }, { "epoch": 0.9090028494627995, "grad_norm": 6.383950233459473, "learning_rate": 1.8212805828614815e-06, "loss": 2.7758031845092774, "step": 112610 }, { "epoch": 0.9090835707885666, "grad_norm": 5.434876441955566, "learning_rate": 1.8196651131232684e-06, "loss": 2.436362075805664, "step": 112620 }, { "epoch": 0.9091642921143337, "grad_norm": 5.175786972045898, "learning_rate": 1.8180496433850557e-06, "loss": 2.3591373443603514, "step": 112630 }, { "epoch": 0.9092450134401008, "grad_norm": 3.2609105110168457, "learning_rate": 1.8164341736468425e-06, "loss": 2.400312042236328, "step": 112640 }, { "epoch": 0.9093257347658678, "grad_norm": 3.629779100418091, "learning_rate": 1.8148187039086293e-06, "loss": 2.005093574523926, "step": 112650 }, { "epoch": 0.9094064560916348, "grad_norm": 4.593919277191162, "learning_rate": 1.8132032341704162e-06, "loss": 2.6899667739868165, "step": 112660 }, { "epoch": 0.9094871774174019, "grad_norm": 2.840864896774292, "learning_rate": 1.811587764432203e-06, "loss": 2.498974418640137, "step": 112670 }, { "epoch": 0.909567898743169, "grad_norm": 2.9846420288085938, "learning_rate": 1.8099722946939899e-06, "loss": 1.9873451232910155, "step": 112680 }, { "epoch": 0.909648620068936, "grad_norm": 5.701085567474365, "learning_rate": 1.8083568249557767e-06, "loss": 2.2501834869384765, "step": 112690 }, { "epoch": 0.9097293413947031, "grad_norm": 6.323845863342285, "learning_rate": 1.8067413552175636e-06, "loss": 2.2142049789428713, "step": 112700 }, { "epoch": 0.9114244892358112, "grad_norm": 4.009427547454834, "learning_rate": 1.7728164907150878e-06, "loss": 2.6353126525878907, "step": 112910 }, { "epoch": 0.9115052105615783, "grad_norm": 3.81833815574646, "learning_rate": 1.7712010209768746e-06, "loss": 2.176779556274414, "step": 112920 }, { "epoch": 0.9115859318873453, "grad_norm": 4.7738165855407715, "learning_rate": 1.7695855512386614e-06, "loss": 2.3398937225341796, "step": 112930 }, { "epoch": 0.9116666532131124, "grad_norm": 4.800845146179199, "learning_rate": 1.7679700815004485e-06, "loss": 2.2756746292114256, "step": 112940 }, { "epoch": 0.9117473745388794, "grad_norm": 2.7706563472747803, "learning_rate": 1.7663546117622353e-06, "loss": 2.8183412551879883, "step": 112950 }, { "epoch": 0.9118280958646465, "grad_norm": 3.7999489307403564, "learning_rate": 1.7647391420240222e-06, "loss": 2.2925041198730467, "step": 112960 }, { "epoch": 0.9119088171904135, "grad_norm": 5.848774433135986, "learning_rate": 1.7631236722858093e-06, "loss": 2.9146453857421877, "step": 112970 }, { "epoch": 0.9119895385161806, "grad_norm": 2.8783986568450928, "learning_rate": 1.761508202547596e-06, "loss": 2.6151508331298827, "step": 112980 }, { "epoch": 0.9120702598419477, "grad_norm": 4.4196014404296875, "learning_rate": 1.759892732809383e-06, "loss": 2.5477668762207033, "step": 112990 }, { "epoch": 0.9121509811677146, "grad_norm": 2.9150023460388184, "learning_rate": 1.7582772630711698e-06, "loss": 2.245960998535156, "step": 113000 }, { "epoch": 0.9122317024934817, "grad_norm": 3.0846762657165527, "learning_rate": 1.7566617933329566e-06, "loss": 2.437294769287109, "step": 113010 }, { "epoch": 0.9123124238192488, "grad_norm": 3.2480103969573975, "learning_rate": 1.7550463235947435e-06, "loss": 3.007705497741699, "step": 113020 }, { "epoch": 0.9123931451450159, "grad_norm": 2.9333488941192627, "learning_rate": 1.7534308538565303e-06, "loss": 2.353177070617676, "step": 113030 }, { "epoch": 0.9124738664707829, "grad_norm": 2.3866958618164062, "learning_rate": 1.7518153841183172e-06, "loss": 2.139505958557129, "step": 113040 }, { "epoch": 0.91255458779655, "grad_norm": 2.9332897663116455, "learning_rate": 1.750199914380104e-06, "loss": 2.139109420776367, "step": 113050 }, { "epoch": 0.912635309122317, "grad_norm": 4.531446933746338, "learning_rate": 1.7485844446418909e-06, "loss": 2.5525419235229494, "step": 113060 }, { "epoch": 0.912716030448084, "grad_norm": 4.872720241546631, "learning_rate": 1.7469689749036777e-06, "loss": 2.407103729248047, "step": 113070 }, { "epoch": 0.9127967517738511, "grad_norm": 3.8791213035583496, "learning_rate": 1.7453535051654646e-06, "loss": 1.992324447631836, "step": 113080 }, { "epoch": 0.9128774730996182, "grad_norm": 1.4895497560501099, "learning_rate": 1.7437380354272516e-06, "loss": 1.9327199935913086, "step": 113090 }, { "epoch": 0.9129581944253853, "grad_norm": 2.502680778503418, "learning_rate": 1.7421225656890385e-06, "loss": 2.3703136444091797, "step": 113100 }, { "epoch": 0.9130389157511523, "grad_norm": 2.3880834579467773, "learning_rate": 1.7405070959508253e-06, "loss": 2.604702186584473, "step": 113110 }, { "epoch": 0.9131196370769193, "grad_norm": 6.929556846618652, "learning_rate": 1.7388916262126121e-06, "loss": 2.3130620956420898, "step": 113120 }, { "epoch": 0.9132003584026864, "grad_norm": 5.17997932434082, "learning_rate": 1.737276156474399e-06, "loss": 2.2923864364624023, "step": 113130 }, { "epoch": 0.9132810797284535, "grad_norm": 3.1837704181671143, "learning_rate": 1.7356606867361858e-06, "loss": 2.3960180282592773, "step": 113140 }, { "epoch": 0.9133618010542205, "grad_norm": 4.17946195602417, "learning_rate": 1.7340452169979727e-06, "loss": 2.772833824157715, "step": 113150 }, { "epoch": 0.9134425223799876, "grad_norm": 3.3111071586608887, "learning_rate": 1.7324297472597595e-06, "loss": 2.61212043762207, "step": 113160 }, { "epoch": 0.9135232437057547, "grad_norm": 4.799348831176758, "learning_rate": 1.7308142775215464e-06, "loss": 2.393562889099121, "step": 113170 }, { "epoch": 0.9136039650315216, "grad_norm": 3.747016429901123, "learning_rate": 1.7291988077833332e-06, "loss": 2.8631114959716797, "step": 113180 }, { "epoch": 0.9136846863572887, "grad_norm": 3.2917234897613525, "learning_rate": 1.72758333804512e-06, "loss": 2.4571693420410154, "step": 113190 }, { "epoch": 0.9137654076830558, "grad_norm": 31.98797035217285, "learning_rate": 1.725967868306907e-06, "loss": 2.52315673828125, "step": 113200 }, { "epoch": 0.9138461290088229, "grad_norm": 3.809725761413574, "learning_rate": 1.7243523985686938e-06, "loss": 2.102252388000488, "step": 113210 }, { "epoch": 0.9139268503345899, "grad_norm": 4.2530059814453125, "learning_rate": 1.7227369288304808e-06, "loss": 2.4299510955810546, "step": 113220 }, { "epoch": 0.914007571660357, "grad_norm": 7.176113605499268, "learning_rate": 1.7211214590922677e-06, "loss": 2.607317352294922, "step": 113230 }, { "epoch": 0.914088292986124, "grad_norm": 5.162712574005127, "learning_rate": 1.7195059893540545e-06, "loss": 2.164511489868164, "step": 113240 }, { "epoch": 0.914169014311891, "grad_norm": 4.258705139160156, "learning_rate": 1.7178905196158414e-06, "loss": 2.717365264892578, "step": 113250 }, { "epoch": 0.9142497356376581, "grad_norm": 3.3481638431549072, "learning_rate": 1.7162750498776284e-06, "loss": 2.4295604705810545, "step": 113260 }, { "epoch": 0.9143304569634252, "grad_norm": 5.046895503997803, "learning_rate": 1.7146595801394153e-06, "loss": 2.756007766723633, "step": 113270 }, { "epoch": 0.9144111782891923, "grad_norm": 5.557920932769775, "learning_rate": 1.713044110401202e-06, "loss": 2.3708045959472654, "step": 113280 }, { "epoch": 0.9144918996149592, "grad_norm": 3.2367680072784424, "learning_rate": 1.711428640662989e-06, "loss": 2.405434417724609, "step": 113290 }, { "epoch": 0.9145726209407263, "grad_norm": 4.9919915199279785, "learning_rate": 1.7098131709247758e-06, "loss": 2.406523895263672, "step": 113300 }, { "epoch": 0.9146533422664934, "grad_norm": 2.451411247253418, "learning_rate": 1.7081977011865629e-06, "loss": 2.3272409439086914, "step": 113310 }, { "epoch": 0.9147340635922604, "grad_norm": 4.6336750984191895, "learning_rate": 1.7065822314483497e-06, "loss": 2.5512441635131835, "step": 113320 }, { "epoch": 0.9148147849180275, "grad_norm": 4.444884777069092, "learning_rate": 1.7049667617101365e-06, "loss": 3.030942916870117, "step": 113330 }, { "epoch": 0.9148955062437946, "grad_norm": 3.505701780319214, "learning_rate": 1.7033512919719234e-06, "loss": 2.2763696670532227, "step": 113340 }, { "epoch": 0.9149762275695617, "grad_norm": 3.591568946838379, "learning_rate": 1.7017358222337102e-06, "loss": 2.795633316040039, "step": 113350 }, { "epoch": 0.9150569488953286, "grad_norm": 4.5390448570251465, "learning_rate": 1.700120352495497e-06, "loss": 2.3159774780273437, "step": 113360 }, { "epoch": 0.9151376702210957, "grad_norm": 2.710252046585083, "learning_rate": 1.698504882757284e-06, "loss": 2.4277177810668946, "step": 113370 }, { "epoch": 0.9152183915468628, "grad_norm": 4.221561908721924, "learning_rate": 1.6968894130190708e-06, "loss": 2.853860092163086, "step": 113380 }, { "epoch": 0.9152991128726298, "grad_norm": 3.172832489013672, "learning_rate": 1.6952739432808576e-06, "loss": 2.860782814025879, "step": 113390 }, { "epoch": 0.9153798341983969, "grad_norm": 8.78459644317627, "learning_rate": 1.6936584735426445e-06, "loss": 3.3359954833984373, "step": 113400 }, { "epoch": 0.9154605555241639, "grad_norm": 4.305883407592773, "learning_rate": 1.6920430038044313e-06, "loss": 2.0894132614135743, "step": 113410 }, { "epoch": 0.915541276849931, "grad_norm": 2.906264066696167, "learning_rate": 1.6904275340662182e-06, "loss": 2.6409284591674806, "step": 113420 }, { "epoch": 0.915621998175698, "grad_norm": 3.52445387840271, "learning_rate": 1.688812064328005e-06, "loss": 2.6624074935913087, "step": 113430 }, { "epoch": 0.9157027195014651, "grad_norm": 4.983597755432129, "learning_rate": 1.687196594589792e-06, "loss": 2.313614845275879, "step": 113440 }, { "epoch": 0.9157834408272322, "grad_norm": 4.528008937835693, "learning_rate": 1.685581124851579e-06, "loss": 2.115904998779297, "step": 113450 }, { "epoch": 0.9158641621529992, "grad_norm": 4.604345321655273, "learning_rate": 1.6839656551133657e-06, "loss": 2.2556638717651367, "step": 113460 }, { "epoch": 0.9159448834787662, "grad_norm": 4.4184041023254395, "learning_rate": 1.6823501853751526e-06, "loss": 2.7016021728515627, "step": 113470 }, { "epoch": 0.9160256048045333, "grad_norm": 2.7851250171661377, "learning_rate": 1.6807347156369394e-06, "loss": 2.4004190444946287, "step": 113480 }, { "epoch": 0.9161063261303004, "grad_norm": 3.0996367931365967, "learning_rate": 1.6791192458987263e-06, "loss": 2.0827836990356445, "step": 113490 }, { "epoch": 0.9161870474560674, "grad_norm": 3.676557779312134, "learning_rate": 1.6775037761605131e-06, "loss": 2.6715831756591797, "step": 113500 }, { "epoch": 0.9162677687818345, "grad_norm": 5.415785789489746, "learning_rate": 1.6758883064223e-06, "loss": 2.2815961837768555, "step": 113510 }, { "epoch": 0.9163484901076016, "grad_norm": 2.9942572116851807, "learning_rate": 1.6742728366840868e-06, "loss": 2.1539215087890624, "step": 113520 }, { "epoch": 0.9164292114333685, "grad_norm": 3.8352725505828857, "learning_rate": 1.6726573669458737e-06, "loss": 2.4980093002319337, "step": 113530 }, { "epoch": 0.9165099327591356, "grad_norm": 4.125062465667725, "learning_rate": 1.6710418972076605e-06, "loss": 2.8249237060546877, "step": 113540 }, { "epoch": 0.9165906540849027, "grad_norm": 2.828545093536377, "learning_rate": 1.6694264274694474e-06, "loss": 2.273743438720703, "step": 113550 }, { "epoch": 0.9166713754106698, "grad_norm": 3.9665474891662598, "learning_rate": 1.6678109577312342e-06, "loss": 2.4697866439819336, "step": 113560 }, { "epoch": 0.9167520967364368, "grad_norm": 4.196365833282471, "learning_rate": 1.6661954879930215e-06, "loss": 2.7104619979858398, "step": 113570 }, { "epoch": 0.9168328180622038, "grad_norm": 4.347589492797852, "learning_rate": 1.6645800182548083e-06, "loss": 2.5698724746704102, "step": 113580 }, { "epoch": 0.9169135393879709, "grad_norm": 3.0955004692077637, "learning_rate": 1.6629645485165952e-06, "loss": 2.173246383666992, "step": 113590 }, { "epoch": 0.9169942607137379, "grad_norm": 6.094313144683838, "learning_rate": 1.661349078778382e-06, "loss": 2.269487190246582, "step": 113600 }, { "epoch": 0.917074982039505, "grad_norm": 3.7099547386169434, "learning_rate": 1.6597336090401689e-06, "loss": 2.4509754180908203, "step": 113610 }, { "epoch": 0.9171557033652721, "grad_norm": 2.9547407627105713, "learning_rate": 1.6581181393019557e-06, "loss": 2.1615339279174806, "step": 113620 }, { "epoch": 0.9172364246910392, "grad_norm": 3.479231119155884, "learning_rate": 1.6565026695637426e-06, "loss": 2.2037946701049806, "step": 113630 }, { "epoch": 0.9173171460168061, "grad_norm": 2.5948398113250732, "learning_rate": 1.6548871998255294e-06, "loss": 1.8107370376586913, "step": 113640 }, { "epoch": 0.9173978673425732, "grad_norm": 3.7490577697753906, "learning_rate": 1.6532717300873162e-06, "loss": 1.9149141311645508, "step": 113650 }, { "epoch": 0.9174785886683403, "grad_norm": 2.171619176864624, "learning_rate": 1.6516562603491033e-06, "loss": 2.095778465270996, "step": 113660 }, { "epoch": 0.9175593099941074, "grad_norm": 4.438429832458496, "learning_rate": 1.6500407906108901e-06, "loss": 2.333975410461426, "step": 113670 }, { "epoch": 0.9176400313198744, "grad_norm": 5.198433876037598, "learning_rate": 1.648425320872677e-06, "loss": 3.138765335083008, "step": 113680 }, { "epoch": 0.9177207526456415, "grad_norm": 3.555126667022705, "learning_rate": 1.6468098511344638e-06, "loss": 2.2282459259033205, "step": 113690 }, { "epoch": 0.9178014739714085, "grad_norm": 5.819677352905273, "learning_rate": 1.6451943813962507e-06, "loss": 2.304913139343262, "step": 113700 }, { "epoch": 0.9178821952971755, "grad_norm": 4.8538126945495605, "learning_rate": 1.6435789116580375e-06, "loss": 2.6326196670532225, "step": 113710 }, { "epoch": 0.9179629166229426, "grad_norm": 7.078052997589111, "learning_rate": 1.6419634419198244e-06, "loss": 2.8251901626586915, "step": 113720 }, { "epoch": 0.9180436379487097, "grad_norm": 4.378780364990234, "learning_rate": 1.6403479721816112e-06, "loss": 2.7045289993286135, "step": 113730 }, { "epoch": 0.9181243592744768, "grad_norm": 4.371597766876221, "learning_rate": 1.638732502443398e-06, "loss": 2.1853221893310546, "step": 113740 }, { "epoch": 0.9182050806002438, "grad_norm": 3.311418294906616, "learning_rate": 1.637117032705185e-06, "loss": 2.1456031799316406, "step": 113750 }, { "epoch": 0.9182858019260108, "grad_norm": 2.6067302227020264, "learning_rate": 1.6355015629669718e-06, "loss": 2.31680850982666, "step": 113760 }, { "epoch": 0.9183665232517779, "grad_norm": 3.4621458053588867, "learning_rate": 1.6338860932287586e-06, "loss": 2.2112499237060548, "step": 113770 }, { "epoch": 0.9184472445775449, "grad_norm": 3.5372061729431152, "learning_rate": 1.6322706234905454e-06, "loss": 1.9067644119262694, "step": 113780 }, { "epoch": 0.918527965903312, "grad_norm": 2.898569345474243, "learning_rate": 1.6306551537523325e-06, "loss": 2.2607593536376953, "step": 113790 }, { "epoch": 0.9186086872290791, "grad_norm": 10.133644104003906, "learning_rate": 1.6290396840141194e-06, "loss": 2.822806167602539, "step": 113800 }, { "epoch": 0.9186894085548462, "grad_norm": 4.656988143920898, "learning_rate": 1.6274242142759062e-06, "loss": 2.1379180908203126, "step": 113810 }, { "epoch": 0.9187701298806131, "grad_norm": 3.763995409011841, "learning_rate": 1.625808744537693e-06, "loss": 2.676003837585449, "step": 113820 }, { "epoch": 0.9188508512063802, "grad_norm": 4.208780288696289, "learning_rate": 1.6241932747994799e-06, "loss": 1.9157892227172852, "step": 113830 }, { "epoch": 0.9189315725321473, "grad_norm": 5.761199474334717, "learning_rate": 1.6225778050612667e-06, "loss": 2.5363616943359375, "step": 113840 }, { "epoch": 0.9190122938579143, "grad_norm": 9.169196128845215, "learning_rate": 1.6209623353230536e-06, "loss": 1.6858854293823242, "step": 113850 }, { "epoch": 0.9190930151836814, "grad_norm": 3.6265156269073486, "learning_rate": 1.6193468655848404e-06, "loss": 1.8341259002685546, "step": 113860 }, { "epoch": 0.9191737365094484, "grad_norm": 4.854389190673828, "learning_rate": 1.6177313958466273e-06, "loss": 1.994818115234375, "step": 113870 }, { "epoch": 0.9192544578352155, "grad_norm": 4.820351600646973, "learning_rate": 1.6161159261084141e-06, "loss": 2.2465131759643553, "step": 113880 }, { "epoch": 0.9193351791609825, "grad_norm": 3.2580273151397705, "learning_rate": 1.6145004563702014e-06, "loss": 2.3591278076171873, "step": 113890 }, { "epoch": 0.9194159004867496, "grad_norm": 4.294199466705322, "learning_rate": 1.6128849866319882e-06, "loss": 1.7361438751220704, "step": 113900 }, { "epoch": 0.9194966218125167, "grad_norm": 3.484541177749634, "learning_rate": 1.611269516893775e-06, "loss": 2.292098617553711, "step": 113910 }, { "epoch": 0.9195773431382837, "grad_norm": 5.065290451049805, "learning_rate": 1.609654047155562e-06, "loss": 2.001115608215332, "step": 113920 }, { "epoch": 0.9196580644640507, "grad_norm": 3.563899278640747, "learning_rate": 1.6080385774173488e-06, "loss": 1.9399639129638673, "step": 113930 }, { "epoch": 0.9197387857898178, "grad_norm": 4.784797668457031, "learning_rate": 1.6064231076791356e-06, "loss": 2.8356948852539063, "step": 113940 }, { "epoch": 0.9198195071155849, "grad_norm": 4.099924564361572, "learning_rate": 1.6048076379409225e-06, "loss": 2.1696140289306642, "step": 113950 }, { "epoch": 0.9199002284413519, "grad_norm": 3.1037325859069824, "learning_rate": 1.6031921682027093e-06, "loss": 2.1248144149780273, "step": 113960 }, { "epoch": 0.919980949767119, "grad_norm": 3.7771246433258057, "learning_rate": 1.6015766984644962e-06, "loss": 2.432634162902832, "step": 113970 }, { "epoch": 0.9200616710928861, "grad_norm": 3.9835891723632812, "learning_rate": 1.599961228726283e-06, "loss": 2.3057918548583984, "step": 113980 }, { "epoch": 0.920142392418653, "grad_norm": 3.962656021118164, "learning_rate": 1.5983457589880698e-06, "loss": 2.252834510803223, "step": 113990 }, { "epoch": 0.9202231137444201, "grad_norm": 8.091143608093262, "learning_rate": 1.5967302892498567e-06, "loss": 2.5343391418457033, "step": 114000 }, { "epoch": 0.9203038350701872, "grad_norm": 3.190525770187378, "learning_rate": 1.5951148195116437e-06, "loss": 2.644308662414551, "step": 114010 }, { "epoch": 0.9203845563959543, "grad_norm": 4.131747722625732, "learning_rate": 1.5934993497734306e-06, "loss": 2.180876350402832, "step": 114020 }, { "epoch": 0.9204652777217213, "grad_norm": 3.141871452331543, "learning_rate": 1.5918838800352174e-06, "loss": 2.7198535919189455, "step": 114030 }, { "epoch": 0.9205459990474884, "grad_norm": 4.776580333709717, "learning_rate": 1.5902684102970043e-06, "loss": 2.918092155456543, "step": 114040 }, { "epoch": 0.9206267203732554, "grad_norm": 2.0267271995544434, "learning_rate": 1.5886529405587911e-06, "loss": 2.0804817199707033, "step": 114050 }, { "epoch": 0.9207074416990224, "grad_norm": 5.772120952606201, "learning_rate": 1.587037470820578e-06, "loss": 2.0475135803222657, "step": 114060 }, { "epoch": 0.9207881630247895, "grad_norm": 6.231616497039795, "learning_rate": 1.5854220010823648e-06, "loss": 2.499300003051758, "step": 114070 }, { "epoch": 0.9208688843505566, "grad_norm": 3.0629966259002686, "learning_rate": 1.5838065313441517e-06, "loss": 2.503468322753906, "step": 114080 }, { "epoch": 0.9209496056763237, "grad_norm": 3.4185431003570557, "learning_rate": 1.5821910616059385e-06, "loss": 2.5785806655883787, "step": 114090 }, { "epoch": 0.9210303270020906, "grad_norm": 3.894012212753296, "learning_rate": 1.5805755918677254e-06, "loss": 2.4912765502929686, "step": 114100 }, { "epoch": 0.9211110483278577, "grad_norm": 3.9847302436828613, "learning_rate": 1.5789601221295122e-06, "loss": 2.122834396362305, "step": 114110 }, { "epoch": 0.9211917696536248, "grad_norm": 4.170940399169922, "learning_rate": 1.577344652391299e-06, "loss": 2.518402099609375, "step": 114120 }, { "epoch": 0.9212724909793919, "grad_norm": 6.736671447753906, "learning_rate": 1.5757291826530859e-06, "loss": 2.3219669342041014, "step": 114130 }, { "epoch": 0.9213532123051589, "grad_norm": 4.148315906524658, "learning_rate": 1.574113712914873e-06, "loss": 2.7776462554931642, "step": 114140 }, { "epoch": 0.921433933630926, "grad_norm": 3.8572728633880615, "learning_rate": 1.5724982431766598e-06, "loss": 1.9732341766357422, "step": 114150 }, { "epoch": 0.921514654956693, "grad_norm": 3.6805644035339355, "learning_rate": 1.5708827734384466e-06, "loss": 2.5182714462280273, "step": 114160 }, { "epoch": 0.92159537628246, "grad_norm": 3.2582805156707764, "learning_rate": 1.5692673037002335e-06, "loss": 2.2886690139770507, "step": 114170 }, { "epoch": 0.9216760976082271, "grad_norm": 2.3244619369506836, "learning_rate": 1.5676518339620203e-06, "loss": 2.1100467681884765, "step": 114180 }, { "epoch": 0.9217568189339942, "grad_norm": 5.026308059692383, "learning_rate": 1.5660363642238072e-06, "loss": 2.2845888137817383, "step": 114190 }, { "epoch": 0.9218375402597613, "grad_norm": 4.274500370025635, "learning_rate": 1.5644208944855942e-06, "loss": 2.650263214111328, "step": 114200 }, { "epoch": 0.9219182615855283, "grad_norm": 5.079118728637695, "learning_rate": 1.562805424747381e-06, "loss": 2.9021127700805662, "step": 114210 }, { "epoch": 0.9219989829112953, "grad_norm": 3.235291004180908, "learning_rate": 1.5611899550091681e-06, "loss": 2.0543359756469726, "step": 114220 }, { "epoch": 0.9220797042370624, "grad_norm": 4.894282341003418, "learning_rate": 1.559574485270955e-06, "loss": 2.1394142150878905, "step": 114230 }, { "epoch": 0.9221604255628294, "grad_norm": 3.7383456230163574, "learning_rate": 1.5579590155327418e-06, "loss": 2.3316532135009767, "step": 114240 }, { "epoch": 0.9222411468885965, "grad_norm": 3.5721867084503174, "learning_rate": 1.5563435457945287e-06, "loss": 1.940962791442871, "step": 114250 }, { "epoch": 0.9223218682143636, "grad_norm": 3.0370569229125977, "learning_rate": 1.5547280760563155e-06, "loss": 2.354666900634766, "step": 114260 }, { "epoch": 0.9224025895401307, "grad_norm": 2.5563747882843018, "learning_rate": 1.5531126063181024e-06, "loss": 2.4119289398193358, "step": 114270 }, { "epoch": 0.9224833108658976, "grad_norm": 4.563459396362305, "learning_rate": 1.5514971365798892e-06, "loss": 2.5025339126586914, "step": 114280 }, { "epoch": 0.9225640321916647, "grad_norm": 3.7651638984680176, "learning_rate": 1.549881666841676e-06, "loss": 2.6447517395019533, "step": 114290 }, { "epoch": 0.9226447535174318, "grad_norm": 4.122200965881348, "learning_rate": 1.548266197103463e-06, "loss": 2.7570648193359375, "step": 114300 }, { "epoch": 0.9227254748431988, "grad_norm": 4.6640214920043945, "learning_rate": 1.5466507273652498e-06, "loss": 1.9693708419799805, "step": 114310 }, { "epoch": 0.9228061961689659, "grad_norm": 4.5402140617370605, "learning_rate": 1.5450352576270366e-06, "loss": 2.390579414367676, "step": 114320 }, { "epoch": 0.922886917494733, "grad_norm": 5.174021244049072, "learning_rate": 1.5434197878888234e-06, "loss": 2.095050239562988, "step": 114330 }, { "epoch": 0.9229676388205, "grad_norm": 2.756559133529663, "learning_rate": 1.5418043181506103e-06, "loss": 1.9995050430297852, "step": 114340 }, { "epoch": 0.923048360146267, "grad_norm": 3.1402363777160645, "learning_rate": 1.5401888484123973e-06, "loss": 1.881627655029297, "step": 114350 }, { "epoch": 0.9231290814720341, "grad_norm": 2.0510027408599854, "learning_rate": 1.5385733786741842e-06, "loss": 2.768137741088867, "step": 114360 }, { "epoch": 0.9232098027978012, "grad_norm": 6.348608493804932, "learning_rate": 1.536957908935971e-06, "loss": 2.643519973754883, "step": 114370 }, { "epoch": 0.9232905241235682, "grad_norm": 3.6378376483917236, "learning_rate": 1.5353424391977579e-06, "loss": 2.1489564895629885, "step": 114380 }, { "epoch": 0.9233712454493352, "grad_norm": 2.5877766609191895, "learning_rate": 1.5337269694595447e-06, "loss": 2.2636510848999025, "step": 114390 }, { "epoch": 0.9234519667751023, "grad_norm": 2.719435930252075, "learning_rate": 1.5321114997213316e-06, "loss": 2.190149688720703, "step": 114400 }, { "epoch": 0.9235326881008694, "grad_norm": 3.5229337215423584, "learning_rate": 1.5304960299831184e-06, "loss": 2.543103790283203, "step": 114410 }, { "epoch": 0.9236134094266364, "grad_norm": 4.50472354888916, "learning_rate": 1.5288805602449053e-06, "loss": 2.457373046875, "step": 114420 }, { "epoch": 0.9236941307524035, "grad_norm": 5.432377338409424, "learning_rate": 1.5272650905066921e-06, "loss": 2.238886833190918, "step": 114430 }, { "epoch": 0.9237748520781706, "grad_norm": 3.8102805614471436, "learning_rate": 1.525649620768479e-06, "loss": 2.0963958740234374, "step": 114440 }, { "epoch": 0.9238555734039375, "grad_norm": 2.4662888050079346, "learning_rate": 1.5240341510302658e-06, "loss": 1.9052509307861327, "step": 114450 }, { "epoch": 0.9239362947297046, "grad_norm": 5.4989471435546875, "learning_rate": 1.5224186812920526e-06, "loss": 2.3213743209838866, "step": 114460 }, { "epoch": 0.9240170160554717, "grad_norm": 7.078558444976807, "learning_rate": 1.5208032115538395e-06, "loss": 2.3255636215209963, "step": 114470 }, { "epoch": 0.9240977373812388, "grad_norm": 1.5983943939208984, "learning_rate": 1.5191877418156266e-06, "loss": 2.306036186218262, "step": 114480 }, { "epoch": 0.9241784587070058, "grad_norm": 3.39046573638916, "learning_rate": 1.5175722720774134e-06, "loss": 2.500376892089844, "step": 114490 }, { "epoch": 0.9242591800327729, "grad_norm": 5.691641330718994, "learning_rate": 1.5159568023392002e-06, "loss": 2.435289764404297, "step": 114500 }, { "epoch": 0.9243399013585399, "grad_norm": 2.3577778339385986, "learning_rate": 1.5143413326009873e-06, "loss": 2.349638748168945, "step": 114510 }, { "epoch": 0.9244206226843069, "grad_norm": 3.4819135665893555, "learning_rate": 1.5127258628627741e-06, "loss": 2.3250757217407227, "step": 114520 }, { "epoch": 0.924501344010074, "grad_norm": 3.4232659339904785, "learning_rate": 1.511110393124561e-06, "loss": 2.1245330810546874, "step": 114530 }, { "epoch": 0.9245820653358411, "grad_norm": 2.958812952041626, "learning_rate": 1.5094949233863478e-06, "loss": 1.9991926193237304, "step": 114540 }, { "epoch": 0.9246627866616082, "grad_norm": 1.4988263845443726, "learning_rate": 1.5078794536481347e-06, "loss": 2.370512771606445, "step": 114550 }, { "epoch": 0.9247435079873751, "grad_norm": 3.468003988265991, "learning_rate": 1.5062639839099215e-06, "loss": 2.1453577041625977, "step": 114560 }, { "epoch": 0.9248242293131422, "grad_norm": 3.634303569793701, "learning_rate": 1.5046485141717086e-06, "loss": 2.09470157623291, "step": 114570 }, { "epoch": 0.9249049506389093, "grad_norm": 4.276054382324219, "learning_rate": 1.5030330444334954e-06, "loss": 2.1755441665649413, "step": 114580 }, { "epoch": 0.9249856719646763, "grad_norm": 6.761447429656982, "learning_rate": 1.5014175746952823e-06, "loss": 2.5729318618774415, "step": 114590 }, { "epoch": 0.9250663932904434, "grad_norm": 3.8043649196624756, "learning_rate": 1.4998021049570691e-06, "loss": 2.2308530807495117, "step": 114600 }, { "epoch": 0.9251471146162105, "grad_norm": 5.296950340270996, "learning_rate": 1.498186635218856e-06, "loss": 2.3876420974731447, "step": 114610 }, { "epoch": 0.9252278359419775, "grad_norm": 2.240490198135376, "learning_rate": 1.4965711654806428e-06, "loss": 2.580001640319824, "step": 114620 }, { "epoch": 0.9253085572677445, "grad_norm": 4.054125785827637, "learning_rate": 1.4949556957424297e-06, "loss": 1.994578742980957, "step": 114630 }, { "epoch": 0.9253892785935116, "grad_norm": 3.5109710693359375, "learning_rate": 1.4933402260042165e-06, "loss": 2.364775276184082, "step": 114640 }, { "epoch": 0.9254699999192787, "grad_norm": 4.84547758102417, "learning_rate": 1.4917247562660034e-06, "loss": 2.4217449188232423, "step": 114650 }, { "epoch": 0.9255507212450458, "grad_norm": 3.6629798412323, "learning_rate": 1.4901092865277902e-06, "loss": 2.4660566329956053, "step": 114660 }, { "epoch": 0.9256314425708128, "grad_norm": 4.934114933013916, "learning_rate": 1.488493816789577e-06, "loss": 2.126299285888672, "step": 114670 }, { "epoch": 0.9257121638965798, "grad_norm": 3.910954236984253, "learning_rate": 1.4868783470513639e-06, "loss": 2.400196838378906, "step": 114680 }, { "epoch": 0.9257928852223469, "grad_norm": 4.224709987640381, "learning_rate": 1.4852628773131507e-06, "loss": 2.6066003799438477, "step": 114690 }, { "epoch": 0.9258736065481139, "grad_norm": 4.193264484405518, "learning_rate": 1.4836474075749378e-06, "loss": 2.257695960998535, "step": 114700 }, { "epoch": 0.925954327873881, "grad_norm": 3.119811773300171, "learning_rate": 1.4820319378367246e-06, "loss": 2.1460628509521484, "step": 114710 }, { "epoch": 0.9260350491996481, "grad_norm": 6.006170749664307, "learning_rate": 1.4804164680985115e-06, "loss": 2.004112434387207, "step": 114720 }, { "epoch": 0.9261157705254152, "grad_norm": 18.823444366455078, "learning_rate": 1.4788009983602983e-06, "loss": 2.7963560104370115, "step": 114730 }, { "epoch": 0.9261964918511821, "grad_norm": 2.577613353729248, "learning_rate": 1.4771855286220852e-06, "loss": 2.5602466583251955, "step": 114740 }, { "epoch": 0.9262772131769492, "grad_norm": 1.6225035190582275, "learning_rate": 1.475570058883872e-06, "loss": 2.325012969970703, "step": 114750 }, { "epoch": 0.9263579345027163, "grad_norm": 4.22934627532959, "learning_rate": 1.4739545891456589e-06, "loss": 2.1202861785888674, "step": 114760 }, { "epoch": 0.9264386558284833, "grad_norm": 4.054572105407715, "learning_rate": 1.4723391194074457e-06, "loss": 2.183930778503418, "step": 114770 }, { "epoch": 0.9265193771542504, "grad_norm": 6.448879718780518, "learning_rate": 1.4707236496692326e-06, "loss": 2.29538516998291, "step": 114780 }, { "epoch": 0.9266000984800175, "grad_norm": 4.28525447845459, "learning_rate": 1.4691081799310194e-06, "loss": 2.406654930114746, "step": 114790 }, { "epoch": 0.9266808198057845, "grad_norm": 3.0178909301757812, "learning_rate": 1.4674927101928062e-06, "loss": 2.5386613845825194, "step": 114800 }, { "epoch": 0.9267615411315515, "grad_norm": 2.1638832092285156, "learning_rate": 1.465877240454593e-06, "loss": 2.521078872680664, "step": 114810 }, { "epoch": 0.9268422624573186, "grad_norm": 4.818607330322266, "learning_rate": 1.46426177071638e-06, "loss": 2.6062950134277343, "step": 114820 }, { "epoch": 0.9269229837830857, "grad_norm": 3.70235538482666, "learning_rate": 1.4626463009781672e-06, "loss": 2.540309524536133, "step": 114830 }, { "epoch": 0.9270037051088527, "grad_norm": 3.37065052986145, "learning_rate": 1.461030831239954e-06, "loss": 2.57079963684082, "step": 114840 }, { "epoch": 0.9270844264346197, "grad_norm": 6.858484745025635, "learning_rate": 1.459415361501741e-06, "loss": 2.5801258087158203, "step": 114850 }, { "epoch": 0.9271651477603868, "grad_norm": 5.674017429351807, "learning_rate": 1.4577998917635277e-06, "loss": 2.2147260665893556, "step": 114860 }, { "epoch": 0.9272458690861539, "grad_norm": 7.645137310028076, "learning_rate": 1.4561844220253146e-06, "loss": 2.6234790802001955, "step": 114870 }, { "epoch": 0.9273265904119209, "grad_norm": 6.032751560211182, "learning_rate": 1.4545689522871014e-06, "loss": 2.605206298828125, "step": 114880 }, { "epoch": 0.927407311737688, "grad_norm": 6.318601131439209, "learning_rate": 1.4529534825488883e-06, "loss": 1.9226030349731444, "step": 114890 }, { "epoch": 0.9274880330634551, "grad_norm": 2.6755151748657227, "learning_rate": 1.4513380128106751e-06, "loss": 2.1830318450927733, "step": 114900 }, { "epoch": 0.927568754389222, "grad_norm": 3.150757074356079, "learning_rate": 1.449722543072462e-06, "loss": 2.2758258819580077, "step": 114910 }, { "epoch": 0.9276494757149891, "grad_norm": 4.9106011390686035, "learning_rate": 1.448107073334249e-06, "loss": 2.2559955596923826, "step": 114920 }, { "epoch": 0.9277301970407562, "grad_norm": 3.7143044471740723, "learning_rate": 1.4464916035960359e-06, "loss": 2.050735855102539, "step": 114930 }, { "epoch": 0.9278109183665233, "grad_norm": 2.951897144317627, "learning_rate": 1.4448761338578227e-06, "loss": 2.878458023071289, "step": 114940 }, { "epoch": 0.9278916396922903, "grad_norm": 3.744189500808716, "learning_rate": 1.4432606641196096e-06, "loss": 2.1478387832641603, "step": 114950 }, { "epoch": 0.9279723610180574, "grad_norm": 2.800539970397949, "learning_rate": 1.4416451943813964e-06, "loss": 2.3501325607299806, "step": 114960 }, { "epoch": 0.9280530823438244, "grad_norm": 3.2079358100891113, "learning_rate": 1.4400297246431833e-06, "loss": 2.8362316131591796, "step": 114970 }, { "epoch": 0.9281338036695914, "grad_norm": 3.5991811752319336, "learning_rate": 1.4384142549049701e-06, "loss": 2.052171325683594, "step": 114980 }, { "epoch": 0.9282145249953585, "grad_norm": 3.9345712661743164, "learning_rate": 1.436798785166757e-06, "loss": 2.3731353759765623, "step": 114990 }, { "epoch": 0.9282952463211256, "grad_norm": 3.295267343521118, "learning_rate": 1.4351833154285438e-06, "loss": 2.0120763778686523, "step": 115000 }, { "epoch": 0.9283759676468927, "grad_norm": 4.049602031707764, "learning_rate": 1.4335678456903306e-06, "loss": 2.0410694122314452, "step": 115010 }, { "epoch": 0.9284566889726596, "grad_norm": 3.854203939437866, "learning_rate": 1.4319523759521175e-06, "loss": 2.4502687454223633, "step": 115020 }, { "epoch": 0.9285374102984267, "grad_norm": 3.1528513431549072, "learning_rate": 1.4303369062139043e-06, "loss": 2.6329418182373048, "step": 115030 }, { "epoch": 0.9286181316241938, "grad_norm": 3.369901657104492, "learning_rate": 1.4287214364756912e-06, "loss": 2.6868354797363283, "step": 115040 }, { "epoch": 0.9286988529499608, "grad_norm": 5.323011875152588, "learning_rate": 1.4271059667374782e-06, "loss": 2.01826171875, "step": 115050 }, { "epoch": 0.9287795742757279, "grad_norm": 2.3180160522460938, "learning_rate": 1.425490496999265e-06, "loss": 2.2731704711914062, "step": 115060 }, { "epoch": 0.928860295601495, "grad_norm": 4.8155012130737305, "learning_rate": 1.423875027261052e-06, "loss": 2.5903398513793947, "step": 115070 }, { "epoch": 0.928941016927262, "grad_norm": 1.8658760786056519, "learning_rate": 1.4222595575228388e-06, "loss": 2.219595527648926, "step": 115080 }, { "epoch": 0.929021738253029, "grad_norm": 5.2596659660339355, "learning_rate": 1.4206440877846256e-06, "loss": 1.9780244827270508, "step": 115090 }, { "epoch": 0.9291024595787961, "grad_norm": 3.237318277359009, "learning_rate": 1.4190286180464125e-06, "loss": 1.9856325149536134, "step": 115100 }, { "epoch": 0.9291831809045632, "grad_norm": 3.3324592113494873, "learning_rate": 1.4174131483081993e-06, "loss": 2.238266372680664, "step": 115110 }, { "epoch": 0.9292639022303303, "grad_norm": 1.9962224960327148, "learning_rate": 1.4157976785699862e-06, "loss": 2.6960899353027346, "step": 115120 }, { "epoch": 0.9293446235560973, "grad_norm": 8.295565605163574, "learning_rate": 1.414182208831773e-06, "loss": 3.144623565673828, "step": 115130 }, { "epoch": 0.9294253448818643, "grad_norm": 3.94753098487854, "learning_rate": 1.4125667390935603e-06, "loss": 2.505484199523926, "step": 115140 }, { "epoch": 0.9295060662076314, "grad_norm": 4.433410167694092, "learning_rate": 1.4109512693553471e-06, "loss": 2.401514434814453, "step": 115150 }, { "epoch": 0.9295867875333984, "grad_norm": 3.548006057739258, "learning_rate": 1.409335799617134e-06, "loss": 2.1123361587524414, "step": 115160 }, { "epoch": 0.9296675088591655, "grad_norm": 3.197328567504883, "learning_rate": 1.4077203298789208e-06, "loss": 2.2463895797729494, "step": 115170 }, { "epoch": 0.9297482301849326, "grad_norm": 2.859292507171631, "learning_rate": 1.4061048601407077e-06, "loss": 2.242676544189453, "step": 115180 }, { "epoch": 0.9298289515106997, "grad_norm": 3.6870620250701904, "learning_rate": 1.4044893904024945e-06, "loss": 2.366191864013672, "step": 115190 }, { "epoch": 0.9299096728364666, "grad_norm": 4.157262325286865, "learning_rate": 1.4028739206642813e-06, "loss": 2.666528511047363, "step": 115200 }, { "epoch": 0.9299903941622337, "grad_norm": 6.1744208335876465, "learning_rate": 1.4012584509260682e-06, "loss": 2.2795379638671873, "step": 115210 }, { "epoch": 0.9300711154880008, "grad_norm": 4.002687454223633, "learning_rate": 1.399642981187855e-06, "loss": 2.590837287902832, "step": 115220 }, { "epoch": 0.9301518368137678, "grad_norm": 5.301192283630371, "learning_rate": 1.3980275114496419e-06, "loss": 2.3600854873657227, "step": 115230 }, { "epoch": 0.9302325581395349, "grad_norm": 1.9902218580245972, "learning_rate": 1.3964120417114287e-06, "loss": 2.0070247650146484, "step": 115240 }, { "epoch": 0.930313279465302, "grad_norm": 4.424663543701172, "learning_rate": 1.3947965719732156e-06, "loss": 2.18575439453125, "step": 115250 }, { "epoch": 0.930394000791069, "grad_norm": 3.006652355194092, "learning_rate": 1.3931811022350024e-06, "loss": 2.3488603591918946, "step": 115260 }, { "epoch": 0.930474722116836, "grad_norm": 3.622133255004883, "learning_rate": 1.3915656324967895e-06, "loss": 2.3932287216186525, "step": 115270 }, { "epoch": 0.9305554434426031, "grad_norm": 2.3097827434539795, "learning_rate": 1.3899501627585763e-06, "loss": 2.1123958587646485, "step": 115280 }, { "epoch": 0.9306361647683702, "grad_norm": 4.126400947570801, "learning_rate": 1.3883346930203632e-06, "loss": 2.477811813354492, "step": 115290 }, { "epoch": 0.9307168860941372, "grad_norm": 6.35334587097168, "learning_rate": 1.38671922328215e-06, "loss": 2.403642272949219, "step": 115300 }, { "epoch": 0.9307976074199042, "grad_norm": 3.798579692840576, "learning_rate": 1.3851037535439369e-06, "loss": 2.407797431945801, "step": 115310 }, { "epoch": 0.9308783287456713, "grad_norm": 4.609941005706787, "learning_rate": 1.3834882838057237e-06, "loss": 2.519329643249512, "step": 115320 }, { "epoch": 0.9309590500714384, "grad_norm": 4.075804233551025, "learning_rate": 1.3818728140675106e-06, "loss": 2.175740432739258, "step": 115330 }, { "epoch": 0.9310397713972054, "grad_norm": 2.4949951171875, "learning_rate": 1.3802573443292974e-06, "loss": 2.4376911163330077, "step": 115340 }, { "epoch": 0.9311204927229725, "grad_norm": 4.696411609649658, "learning_rate": 1.3786418745910842e-06, "loss": 2.305118751525879, "step": 115350 }, { "epoch": 0.9312012140487396, "grad_norm": 3.9425811767578125, "learning_rate": 1.377026404852871e-06, "loss": 2.318072509765625, "step": 115360 }, { "epoch": 0.9312819353745065, "grad_norm": 2.3515899181365967, "learning_rate": 1.375410935114658e-06, "loss": 2.2833183288574217, "step": 115370 }, { "epoch": 0.9313626567002736, "grad_norm": 4.227041721343994, "learning_rate": 1.3737954653764448e-06, "loss": 2.7029659271240236, "step": 115380 }, { "epoch": 0.9314433780260407, "grad_norm": 3.431298017501831, "learning_rate": 1.3721799956382316e-06, "loss": 2.309818458557129, "step": 115390 }, { "epoch": 0.9315240993518078, "grad_norm": 2.984476327896118, "learning_rate": 1.3705645259000187e-06, "loss": 2.3449832916259767, "step": 115400 }, { "epoch": 0.9316048206775748, "grad_norm": 3.124537706375122, "learning_rate": 1.3689490561618055e-06, "loss": 2.4551483154296876, "step": 115410 }, { "epoch": 0.9316855420033419, "grad_norm": 2.834897756576538, "learning_rate": 1.3673335864235924e-06, "loss": 2.7095409393310548, "step": 115420 }, { "epoch": 0.931766263329109, "grad_norm": 3.9235787391662598, "learning_rate": 1.3657181166853792e-06, "loss": 2.1483444213867187, "step": 115430 }, { "epoch": 0.9318469846548759, "grad_norm": 4.11380672454834, "learning_rate": 1.364102646947166e-06, "loss": 1.942799186706543, "step": 115440 }, { "epoch": 0.931927705980643, "grad_norm": 3.5168070793151855, "learning_rate": 1.3624871772089531e-06, "loss": 2.5139787673950194, "step": 115450 }, { "epoch": 0.9320084273064101, "grad_norm": 4.551575183868408, "learning_rate": 1.36087170747074e-06, "loss": 2.0232446670532225, "step": 115460 }, { "epoch": 0.9320891486321772, "grad_norm": 2.7345616817474365, "learning_rate": 1.3592562377325268e-06, "loss": 2.277198600769043, "step": 115470 }, { "epoch": 0.9321698699579442, "grad_norm": 2.5889875888824463, "learning_rate": 1.3576407679943139e-06, "loss": 2.3374259948730467, "step": 115480 }, { "epoch": 0.9322505912837112, "grad_norm": 4.819955348968506, "learning_rate": 1.3560252982561007e-06, "loss": 2.724321746826172, "step": 115490 }, { "epoch": 0.9323313126094783, "grad_norm": 2.907670497894287, "learning_rate": 1.3544098285178876e-06, "loss": 2.746540641784668, "step": 115500 }, { "epoch": 0.9324120339352453, "grad_norm": 3.0481698513031006, "learning_rate": 1.3527943587796744e-06, "loss": 2.7697141647338865, "step": 115510 }, { "epoch": 0.9324927552610124, "grad_norm": 5.495076656341553, "learning_rate": 1.3511788890414613e-06, "loss": 2.571019744873047, "step": 115520 }, { "epoch": 0.9325734765867795, "grad_norm": 3.7238168716430664, "learning_rate": 1.349563419303248e-06, "loss": 2.4622425079345702, "step": 115530 }, { "epoch": 0.9326541979125466, "grad_norm": 1.5877976417541504, "learning_rate": 1.347947949565035e-06, "loss": 2.059061622619629, "step": 115540 }, { "epoch": 0.9327349192383135, "grad_norm": 3.6096978187561035, "learning_rate": 1.3463324798268218e-06, "loss": 2.4041841506958006, "step": 115550 }, { "epoch": 0.9328156405640806, "grad_norm": 3.2354776859283447, "learning_rate": 1.3447170100886086e-06, "loss": 2.659715270996094, "step": 115560 }, { "epoch": 0.9328963618898477, "grad_norm": 4.027751445770264, "learning_rate": 1.3431015403503955e-06, "loss": 2.6421720504760744, "step": 115570 }, { "epoch": 0.9329770832156148, "grad_norm": 2.6765661239624023, "learning_rate": 1.3414860706121823e-06, "loss": 2.6024152755737306, "step": 115580 }, { "epoch": 0.9330578045413818, "grad_norm": 2.743699073791504, "learning_rate": 1.3398706008739692e-06, "loss": 2.978547286987305, "step": 115590 }, { "epoch": 0.9331385258671488, "grad_norm": 2.6559505462646484, "learning_rate": 1.338255131135756e-06, "loss": 2.443609428405762, "step": 115600 }, { "epoch": 0.9332192471929159, "grad_norm": 3.851468324661255, "learning_rate": 1.336639661397543e-06, "loss": 2.4564064025878904, "step": 115610 }, { "epoch": 0.9332999685186829, "grad_norm": 2.880833864212036, "learning_rate": 1.33502419165933e-06, "loss": 2.4108489990234374, "step": 115620 }, { "epoch": 0.93338068984445, "grad_norm": 2.5454800128936768, "learning_rate": 1.3334087219211168e-06, "loss": 2.619719314575195, "step": 115630 }, { "epoch": 0.9334614111702171, "grad_norm": 3.5316741466522217, "learning_rate": 1.3317932521829036e-06, "loss": 2.101420211791992, "step": 115640 }, { "epoch": 0.9335421324959842, "grad_norm": 3.7557644844055176, "learning_rate": 1.3301777824446905e-06, "loss": 2.507534217834473, "step": 115650 }, { "epoch": 0.9336228538217511, "grad_norm": 2.4525647163391113, "learning_rate": 1.3285623127064773e-06, "loss": 2.4676342010498047, "step": 115660 }, { "epoch": 0.9337035751475182, "grad_norm": 4.231429576873779, "learning_rate": 1.3269468429682642e-06, "loss": 2.4360265731811523, "step": 115670 }, { "epoch": 0.9337842964732853, "grad_norm": 3.4509143829345703, "learning_rate": 1.325331373230051e-06, "loss": 1.856797981262207, "step": 115680 }, { "epoch": 0.9338650177990523, "grad_norm": 1.488531470298767, "learning_rate": 1.3237159034918378e-06, "loss": 2.1103931427001954, "step": 115690 }, { "epoch": 0.9339457391248194, "grad_norm": 2.611647605895996, "learning_rate": 1.3221004337536247e-06, "loss": 2.4799461364746094, "step": 115700 }, { "epoch": 0.9340264604505865, "grad_norm": 3.583606243133545, "learning_rate": 1.3204849640154115e-06, "loss": 2.0730222702026366, "step": 115710 }, { "epoch": 0.9341071817763535, "grad_norm": 4.792372703552246, "learning_rate": 1.3188694942771984e-06, "loss": 2.681747817993164, "step": 115720 }, { "epoch": 0.9341879031021205, "grad_norm": 2.073763370513916, "learning_rate": 1.3172540245389852e-06, "loss": 2.4231636047363283, "step": 115730 }, { "epoch": 0.9342686244278876, "grad_norm": 8.542563438415527, "learning_rate": 1.3156385548007723e-06, "loss": 2.582292175292969, "step": 115740 }, { "epoch": 0.9343493457536547, "grad_norm": 4.2578301429748535, "learning_rate": 1.3140230850625591e-06, "loss": 2.156059455871582, "step": 115750 }, { "epoch": 0.9344300670794217, "grad_norm": 2.8314242362976074, "learning_rate": 1.312407615324346e-06, "loss": 1.9352054595947266, "step": 115760 }, { "epoch": 0.9345107884051888, "grad_norm": 5.373146057128906, "learning_rate": 1.310792145586133e-06, "loss": 2.231863594055176, "step": 115770 }, { "epoch": 0.9345915097309558, "grad_norm": 3.5484752655029297, "learning_rate": 1.3091766758479199e-06, "loss": 2.1793121337890624, "step": 115780 }, { "epoch": 0.9346722310567229, "grad_norm": 3.3883509635925293, "learning_rate": 1.3075612061097067e-06, "loss": 2.4068851470947266, "step": 115790 }, { "epoch": 0.9347529523824899, "grad_norm": 4.9284186363220215, "learning_rate": 1.3059457363714936e-06, "loss": 2.671767234802246, "step": 115800 }, { "epoch": 0.934833673708257, "grad_norm": 2.9781441688537598, "learning_rate": 1.3043302666332804e-06, "loss": 2.176981735229492, "step": 115810 }, { "epoch": 0.9349143950340241, "grad_norm": 2.649447441101074, "learning_rate": 1.3027147968950673e-06, "loss": 2.214286994934082, "step": 115820 }, { "epoch": 0.934995116359791, "grad_norm": 3.5944061279296875, "learning_rate": 1.3010993271568543e-06, "loss": 2.180555725097656, "step": 115830 }, { "epoch": 0.9350758376855581, "grad_norm": 5.534553050994873, "learning_rate": 1.2994838574186412e-06, "loss": 2.434402656555176, "step": 115840 }, { "epoch": 0.9351565590113252, "grad_norm": 5.500908374786377, "learning_rate": 1.297868387680428e-06, "loss": 2.007633018493652, "step": 115850 }, { "epoch": 0.9352372803370923, "grad_norm": 2.687297821044922, "learning_rate": 1.2962529179422149e-06, "loss": 2.156790542602539, "step": 115860 }, { "epoch": 0.9353180016628593, "grad_norm": 3.9476733207702637, "learning_rate": 1.2946374482040017e-06, "loss": 2.6143468856811523, "step": 115870 }, { "epoch": 0.9353987229886264, "grad_norm": 4.014890670776367, "learning_rate": 1.2930219784657885e-06, "loss": 2.383562469482422, "step": 115880 }, { "epoch": 0.9354794443143934, "grad_norm": 2.963399648666382, "learning_rate": 1.2914065087275754e-06, "loss": 1.7697120666503907, "step": 115890 }, { "epoch": 0.9355601656401604, "grad_norm": 3.5475893020629883, "learning_rate": 1.2897910389893622e-06, "loss": 2.6147911071777346, "step": 115900 }, { "epoch": 0.9356408869659275, "grad_norm": 3.652618169784546, "learning_rate": 1.288175569251149e-06, "loss": 2.6286014556884765, "step": 115910 }, { "epoch": 0.9357216082916946, "grad_norm": 2.9337146282196045, "learning_rate": 1.286560099512936e-06, "loss": 1.8239557266235351, "step": 115920 }, { "epoch": 0.9358023296174617, "grad_norm": 4.028540134429932, "learning_rate": 1.2849446297747228e-06, "loss": 2.8117753982543947, "step": 115930 }, { "epoch": 0.9358830509432287, "grad_norm": 4.6378679275512695, "learning_rate": 1.2833291600365096e-06, "loss": 2.3190258026123045, "step": 115940 }, { "epoch": 0.9359637722689957, "grad_norm": 8.41256332397461, "learning_rate": 1.2817136902982965e-06, "loss": 2.401449203491211, "step": 115950 }, { "epoch": 0.9360444935947628, "grad_norm": 3.840186834335327, "learning_rate": 1.2800982205600835e-06, "loss": 2.698460578918457, "step": 115960 }, { "epoch": 0.9361252149205298, "grad_norm": 2.4660403728485107, "learning_rate": 1.2784827508218704e-06, "loss": 2.999254608154297, "step": 115970 }, { "epoch": 0.9362059362462969, "grad_norm": 2.0351474285125732, "learning_rate": 1.2768672810836572e-06, "loss": 2.106321907043457, "step": 115980 }, { "epoch": 0.936286657572064, "grad_norm": 6.2671661376953125, "learning_rate": 1.275251811345444e-06, "loss": 2.5449459075927736, "step": 115990 }, { "epoch": 0.9363673788978311, "grad_norm": 5.78071928024292, "learning_rate": 1.273636341607231e-06, "loss": 2.592974090576172, "step": 116000 }, { "epoch": 0.936448100223598, "grad_norm": 5.435791015625, "learning_rate": 1.2720208718690178e-06, "loss": 2.275951385498047, "step": 116010 }, { "epoch": 0.9365288215493651, "grad_norm": 3.6583001613616943, "learning_rate": 1.2704054021308046e-06, "loss": 2.6983943939208985, "step": 116020 }, { "epoch": 0.9366095428751322, "grad_norm": 7.323951721191406, "learning_rate": 1.2687899323925914e-06, "loss": 2.1094070434570313, "step": 116030 }, { "epoch": 0.9366902642008992, "grad_norm": 3.902491569519043, "learning_rate": 1.2671744626543783e-06, "loss": 2.3682182312011717, "step": 116040 }, { "epoch": 0.9367709855266663, "grad_norm": 3.9473068714141846, "learning_rate": 1.2655589929161651e-06, "loss": 2.12158317565918, "step": 116050 }, { "epoch": 0.9368517068524334, "grad_norm": 4.48066520690918, "learning_rate": 1.263943523177952e-06, "loss": 2.385732078552246, "step": 116060 }, { "epoch": 0.9369324281782004, "grad_norm": 3.05770206451416, "learning_rate": 1.2623280534397388e-06, "loss": 2.1461612701416017, "step": 116070 }, { "epoch": 0.9370131495039674, "grad_norm": 7.491367816925049, "learning_rate": 1.260712583701526e-06, "loss": 2.7220849990844727, "step": 116080 }, { "epoch": 0.9370938708297345, "grad_norm": 4.554332256317139, "learning_rate": 1.259097113963313e-06, "loss": 1.902249526977539, "step": 116090 }, { "epoch": 0.9371745921555016, "grad_norm": 3.5696640014648438, "learning_rate": 1.2574816442250998e-06, "loss": 2.582340621948242, "step": 116100 }, { "epoch": 0.9372553134812687, "grad_norm": 12.374107360839844, "learning_rate": 1.2558661744868866e-06, "loss": 2.5434890747070313, "step": 116110 }, { "epoch": 0.9373360348070356, "grad_norm": 5.151503086090088, "learning_rate": 1.2542507047486735e-06, "loss": 2.100337028503418, "step": 116120 }, { "epoch": 0.9374167561328027, "grad_norm": 3.2246954441070557, "learning_rate": 1.2526352350104603e-06, "loss": 2.3421199798583983, "step": 116130 }, { "epoch": 0.9374974774585698, "grad_norm": 2.3082683086395264, "learning_rate": 1.2510197652722472e-06, "loss": 2.046294403076172, "step": 116140 }, { "epoch": 0.9375781987843368, "grad_norm": 4.740237236022949, "learning_rate": 1.249404295534034e-06, "loss": 2.0250528335571287, "step": 116150 }, { "epoch": 0.9376589201101039, "grad_norm": 2.432705879211426, "learning_rate": 1.2477888257958209e-06, "loss": 2.573476028442383, "step": 116160 }, { "epoch": 0.937739641435871, "grad_norm": 4.083827018737793, "learning_rate": 1.2461733560576077e-06, "loss": 2.4150123596191406, "step": 116170 }, { "epoch": 0.937820362761638, "grad_norm": 3.276578664779663, "learning_rate": 1.2445578863193948e-06, "loss": 2.1072715759277343, "step": 116180 }, { "epoch": 0.937901084087405, "grad_norm": 2.7458419799804688, "learning_rate": 1.2429424165811816e-06, "loss": 2.4328136444091797, "step": 116190 }, { "epoch": 0.9379818054131721, "grad_norm": 5.470244407653809, "learning_rate": 1.2413269468429685e-06, "loss": 2.005794906616211, "step": 116200 }, { "epoch": 0.9380625267389392, "grad_norm": 4.511528015136719, "learning_rate": 1.2397114771047553e-06, "loss": 2.160118293762207, "step": 116210 }, { "epoch": 0.9381432480647062, "grad_norm": 2.2121877670288086, "learning_rate": 1.2380960073665421e-06, "loss": 2.194301986694336, "step": 116220 }, { "epoch": 0.9382239693904733, "grad_norm": 4.5357584953308105, "learning_rate": 1.236480537628329e-06, "loss": 1.9795089721679688, "step": 116230 }, { "epoch": 0.9383046907162403, "grad_norm": 5.070855140686035, "learning_rate": 1.2348650678901158e-06, "loss": 2.111911582946777, "step": 116240 }, { "epoch": 0.9383854120420074, "grad_norm": 4.29203987121582, "learning_rate": 1.2332495981519027e-06, "loss": 2.1212539672851562, "step": 116250 }, { "epoch": 0.9384661333677744, "grad_norm": 2.8743693828582764, "learning_rate": 1.2316341284136895e-06, "loss": 2.320107078552246, "step": 116260 }, { "epoch": 0.9385468546935415, "grad_norm": 6.710045337677002, "learning_rate": 1.2300186586754764e-06, "loss": 2.5120140075683595, "step": 116270 }, { "epoch": 0.9386275760193086, "grad_norm": 4.433785438537598, "learning_rate": 1.2284031889372632e-06, "loss": 2.351298713684082, "step": 116280 }, { "epoch": 0.9387082973450755, "grad_norm": 3.621819019317627, "learning_rate": 1.22678771919905e-06, "loss": 1.9583568572998047, "step": 116290 }, { "epoch": 0.9387890186708426, "grad_norm": 2.6276276111602783, "learning_rate": 1.225172249460837e-06, "loss": 3.06822509765625, "step": 116300 }, { "epoch": 0.9388697399966097, "grad_norm": 2.369913101196289, "learning_rate": 1.223556779722624e-06, "loss": 2.167134094238281, "step": 116310 }, { "epoch": 0.9389504613223768, "grad_norm": 3.114363193511963, "learning_rate": 1.2219413099844108e-06, "loss": 2.2191816329956056, "step": 116320 }, { "epoch": 0.9390311826481438, "grad_norm": 4.73622989654541, "learning_rate": 1.2203258402461977e-06, "loss": 3.072755241394043, "step": 116330 }, { "epoch": 0.9391119039739109, "grad_norm": 4.336769104003906, "learning_rate": 1.2187103705079845e-06, "loss": 2.5399663925170897, "step": 116340 }, { "epoch": 0.939192625299678, "grad_norm": 2.7133495807647705, "learning_rate": 1.2170949007697716e-06, "loss": 2.120311164855957, "step": 116350 }, { "epoch": 0.9392733466254449, "grad_norm": 2.8437387943267822, "learning_rate": 1.2154794310315584e-06, "loss": 2.882999610900879, "step": 116360 }, { "epoch": 0.939354067951212, "grad_norm": 4.9622578620910645, "learning_rate": 1.2138639612933453e-06, "loss": 2.3489957809448243, "step": 116370 }, { "epoch": 0.9394347892769791, "grad_norm": 3.2522575855255127, "learning_rate": 1.212248491555132e-06, "loss": 2.037038230895996, "step": 116380 }, { "epoch": 0.9395155106027462, "grad_norm": 2.954084634780884, "learning_rate": 1.210633021816919e-06, "loss": 2.468527603149414, "step": 116390 }, { "epoch": 0.9395962319285132, "grad_norm": 4.052637577056885, "learning_rate": 1.2090175520787058e-06, "loss": 2.1183324813842774, "step": 116400 }, { "epoch": 0.9396769532542802, "grad_norm": 4.658243656158447, "learning_rate": 1.2074020823404926e-06, "loss": 2.57403450012207, "step": 116410 }, { "epoch": 0.9397576745800473, "grad_norm": 2.410698652267456, "learning_rate": 1.2057866126022795e-06, "loss": 2.6369159698486326, "step": 116420 }, { "epoch": 0.9398383959058143, "grad_norm": 2.6444244384765625, "learning_rate": 1.2041711428640663e-06, "loss": 2.089219093322754, "step": 116430 }, { "epoch": 0.9399191172315814, "grad_norm": 2.8617098331451416, "learning_rate": 1.2025556731258532e-06, "loss": 2.409651184082031, "step": 116440 }, { "epoch": 0.9399998385573485, "grad_norm": 5.122492790222168, "learning_rate": 1.20094020338764e-06, "loss": 2.077408027648926, "step": 116450 }, { "epoch": 0.9400805598831156, "grad_norm": 2.961793899536133, "learning_rate": 1.1993247336494269e-06, "loss": 2.4335763931274412, "step": 116460 }, { "epoch": 0.9401612812088825, "grad_norm": 4.103317737579346, "learning_rate": 1.197709263911214e-06, "loss": 2.3171413421630858, "step": 116470 }, { "epoch": 0.9402420025346496, "grad_norm": 4.58411979675293, "learning_rate": 1.1960937941730008e-06, "loss": 2.620765495300293, "step": 116480 }, { "epoch": 0.9403227238604167, "grad_norm": 2.1670734882354736, "learning_rate": 1.1944783244347876e-06, "loss": 2.480008125305176, "step": 116490 }, { "epoch": 0.9404034451861837, "grad_norm": 5.105350494384766, "learning_rate": 1.1928628546965745e-06, "loss": 2.574336814880371, "step": 116500 }, { "epoch": 0.9404841665119508, "grad_norm": 3.6317427158355713, "learning_rate": 1.1912473849583613e-06, "loss": 2.444900894165039, "step": 116510 }, { "epoch": 0.9405648878377179, "grad_norm": 3.3672211170196533, "learning_rate": 1.1896319152201482e-06, "loss": 2.409882736206055, "step": 116520 }, { "epoch": 0.9406456091634849, "grad_norm": 2.77175235748291, "learning_rate": 1.1880164454819352e-06, "loss": 2.2801355361938476, "step": 116530 }, { "epoch": 0.9407263304892519, "grad_norm": 3.6729490756988525, "learning_rate": 1.186400975743722e-06, "loss": 1.8266124725341797, "step": 116540 }, { "epoch": 0.940807051815019, "grad_norm": 3.984518527984619, "learning_rate": 1.184785506005509e-06, "loss": 2.660547637939453, "step": 116550 }, { "epoch": 0.9408877731407861, "grad_norm": 5.247560501098633, "learning_rate": 1.1831700362672957e-06, "loss": 3.032420539855957, "step": 116560 }, { "epoch": 0.9409684944665532, "grad_norm": 3.403871774673462, "learning_rate": 1.1815545665290826e-06, "loss": 2.1323968887329103, "step": 116570 }, { "epoch": 0.9410492157923201, "grad_norm": 3.9495797157287598, "learning_rate": 1.1799390967908694e-06, "loss": 2.270454978942871, "step": 116580 }, { "epoch": 0.9411299371180872, "grad_norm": 4.024869441986084, "learning_rate": 1.1783236270526563e-06, "loss": 2.148244094848633, "step": 116590 }, { "epoch": 0.9412106584438543, "grad_norm": 3.491806745529175, "learning_rate": 1.1767081573144431e-06, "loss": 2.0565469741821287, "step": 116600 }, { "epoch": 0.9412913797696213, "grad_norm": 2.2113723754882812, "learning_rate": 1.17509268757623e-06, "loss": 2.4669979095458983, "step": 116610 }, { "epoch": 0.9413721010953884, "grad_norm": 2.167621612548828, "learning_rate": 1.1734772178380168e-06, "loss": 2.624001121520996, "step": 116620 }, { "epoch": 0.9414528224211555, "grad_norm": 5.543852806091309, "learning_rate": 1.1718617480998039e-06, "loss": 2.799495506286621, "step": 116630 }, { "epoch": 0.9415335437469226, "grad_norm": 5.152539253234863, "learning_rate": 1.1702462783615907e-06, "loss": 2.538361740112305, "step": 116640 }, { "epoch": 0.9416142650726895, "grad_norm": 3.9881527423858643, "learning_rate": 1.1686308086233776e-06, "loss": 2.231112480163574, "step": 116650 }, { "epoch": 0.9416949863984566, "grad_norm": 2.3148090839385986, "learning_rate": 1.1670153388851644e-06, "loss": 2.574548530578613, "step": 116660 }, { "epoch": 0.9417757077242237, "grad_norm": 2.1911871433258057, "learning_rate": 1.1653998691469513e-06, "loss": 2.644456481933594, "step": 116670 }, { "epoch": 0.9418564290499907, "grad_norm": 5.914895057678223, "learning_rate": 1.1637843994087381e-06, "loss": 2.791855239868164, "step": 116680 }, { "epoch": 0.9419371503757578, "grad_norm": 4.366228103637695, "learning_rate": 1.162168929670525e-06, "loss": 2.7455617904663088, "step": 116690 }, { "epoch": 0.9420178717015248, "grad_norm": 2.852989435195923, "learning_rate": 1.160553459932312e-06, "loss": 2.211967658996582, "step": 116700 }, { "epoch": 0.9420985930272919, "grad_norm": 5.673786163330078, "learning_rate": 1.1589379901940989e-06, "loss": 2.0245521545410154, "step": 116710 }, { "epoch": 0.9421793143530589, "grad_norm": 4.606922626495361, "learning_rate": 1.1573225204558857e-06, "loss": 2.1428504943847657, "step": 116720 }, { "epoch": 0.942260035678826, "grad_norm": 3.88425874710083, "learning_rate": 1.1557070507176725e-06, "loss": 1.8557485580444335, "step": 116730 }, { "epoch": 0.9423407570045931, "grad_norm": 3.403160810470581, "learning_rate": 1.1540915809794594e-06, "loss": 2.3130884170532227, "step": 116740 }, { "epoch": 0.94242147833036, "grad_norm": 3.3034117221832275, "learning_rate": 1.1524761112412462e-06, "loss": 2.6159244537353517, "step": 116750 }, { "epoch": 0.9425021996561271, "grad_norm": 5.987224102020264, "learning_rate": 1.150860641503033e-06, "loss": 2.441018295288086, "step": 116760 }, { "epoch": 0.9425829209818942, "grad_norm": 2.5941615104675293, "learning_rate": 1.14924517176482e-06, "loss": 2.5063304901123047, "step": 116770 }, { "epoch": 0.9426636423076613, "grad_norm": 5.678422451019287, "learning_rate": 1.1476297020266068e-06, "loss": 2.5580549240112305, "step": 116780 }, { "epoch": 0.9427443636334283, "grad_norm": 4.869787216186523, "learning_rate": 1.1460142322883938e-06, "loss": 1.8246120452880858, "step": 116790 }, { "epoch": 0.9428250849591954, "grad_norm": 4.750952243804932, "learning_rate": 1.1443987625501807e-06, "loss": 2.3034297943115236, "step": 116800 }, { "epoch": 0.9429058062849625, "grad_norm": 4.790670394897461, "learning_rate": 1.1427832928119675e-06, "loss": 1.9676458358764648, "step": 116810 }, { "epoch": 0.9429865276107294, "grad_norm": 2.778698682785034, "learning_rate": 1.1411678230737544e-06, "loss": 2.1544361114501953, "step": 116820 }, { "epoch": 0.9430672489364965, "grad_norm": 2.653798818588257, "learning_rate": 1.1395523533355412e-06, "loss": 2.1928300857543945, "step": 116830 }, { "epoch": 0.9431479702622636, "grad_norm": 6.51083517074585, "learning_rate": 1.137936883597328e-06, "loss": 2.758323860168457, "step": 116840 }, { "epoch": 0.9432286915880307, "grad_norm": 2.5646257400512695, "learning_rate": 1.136321413859115e-06, "loss": 2.383561897277832, "step": 116850 }, { "epoch": 0.9433094129137977, "grad_norm": 2.815901756286621, "learning_rate": 1.1347059441209018e-06, "loss": 2.042521095275879, "step": 116860 }, { "epoch": 0.9433901342395647, "grad_norm": 5.080765247344971, "learning_rate": 1.1330904743826888e-06, "loss": 2.0142370223999024, "step": 116870 }, { "epoch": 0.9434708555653318, "grad_norm": 2.9356822967529297, "learning_rate": 1.1314750046444757e-06, "loss": 2.1439077377319338, "step": 116880 }, { "epoch": 0.9435515768910988, "grad_norm": 3.1612343788146973, "learning_rate": 1.1298595349062625e-06, "loss": 2.2293949127197266, "step": 116890 }, { "epoch": 0.9436322982168659, "grad_norm": 4.91493558883667, "learning_rate": 1.1282440651680494e-06, "loss": 2.4349620819091795, "step": 116900 }, { "epoch": 0.943713019542633, "grad_norm": 1.876494288444519, "learning_rate": 1.1266285954298362e-06, "loss": 2.258712387084961, "step": 116910 }, { "epoch": 0.9437937408684001, "grad_norm": 4.860997200012207, "learning_rate": 1.125013125691623e-06, "loss": 2.2170347213745116, "step": 116920 }, { "epoch": 0.943874462194167, "grad_norm": 2.9222512245178223, "learning_rate": 1.1233976559534099e-06, "loss": 2.3033573150634767, "step": 116930 }, { "epoch": 0.9439551835199341, "grad_norm": 3.2903971672058105, "learning_rate": 1.121782186215197e-06, "loss": 2.2974981307983398, "step": 116940 }, { "epoch": 0.9440359048457012, "grad_norm": 5.159081935882568, "learning_rate": 1.1201667164769838e-06, "loss": 2.4823888778686523, "step": 116950 }, { "epoch": 0.9441166261714682, "grad_norm": 4.865130424499512, "learning_rate": 1.1185512467387706e-06, "loss": 2.3049707412719727, "step": 116960 }, { "epoch": 0.9441973474972353, "grad_norm": 4.375406742095947, "learning_rate": 1.1169357770005575e-06, "loss": 2.162112998962402, "step": 116970 }, { "epoch": 0.9442780688230024, "grad_norm": 2.785947799682617, "learning_rate": 1.1153203072623443e-06, "loss": 2.286681365966797, "step": 116980 }, { "epoch": 0.9443587901487694, "grad_norm": 2.876377820968628, "learning_rate": 1.1137048375241312e-06, "loss": 2.5684768676757814, "step": 116990 }, { "epoch": 0.9444395114745364, "grad_norm": 4.718586444854736, "learning_rate": 1.112089367785918e-06, "loss": 2.715117263793945, "step": 117000 }, { "epoch": 0.9445202328003035, "grad_norm": 2.6417837142944336, "learning_rate": 1.1104738980477049e-06, "loss": 1.947165298461914, "step": 117010 }, { "epoch": 0.9446009541260706, "grad_norm": 4.827341556549072, "learning_rate": 1.1088584283094917e-06, "loss": 2.2761157989501952, "step": 117020 }, { "epoch": 0.9446816754518376, "grad_norm": 2.15794038772583, "learning_rate": 1.1072429585712786e-06, "loss": 2.3671342849731447, "step": 117030 }, { "epoch": 0.9447623967776047, "grad_norm": 3.8139595985412598, "learning_rate": 1.1056274888330654e-06, "loss": 2.3028446197509767, "step": 117040 }, { "epoch": 0.9448431181033717, "grad_norm": 3.1312196254730225, "learning_rate": 1.1040120190948525e-06, "loss": 2.5249088287353514, "step": 117050 }, { "epoch": 0.9449238394291388, "grad_norm": 3.0106041431427, "learning_rate": 1.1023965493566393e-06, "loss": 2.2841171264648437, "step": 117060 }, { "epoch": 0.9450045607549058, "grad_norm": 2.884699821472168, "learning_rate": 1.1007810796184262e-06, "loss": 2.38138427734375, "step": 117070 }, { "epoch": 0.9450852820806729, "grad_norm": 3.50240159034729, "learning_rate": 1.099165609880213e-06, "loss": 2.8051124572753907, "step": 117080 }, { "epoch": 0.94516600340644, "grad_norm": 5.951382637023926, "learning_rate": 1.0975501401419998e-06, "loss": 2.2750282287597656, "step": 117090 }, { "epoch": 0.945246724732207, "grad_norm": 3.6478374004364014, "learning_rate": 1.095934670403787e-06, "loss": 2.124669075012207, "step": 117100 }, { "epoch": 0.945327446057974, "grad_norm": 3.099210023880005, "learning_rate": 1.0943192006655737e-06, "loss": 2.1227184295654298, "step": 117110 }, { "epoch": 0.9454081673837411, "grad_norm": 7.762210369110107, "learning_rate": 1.0927037309273606e-06, "loss": 2.704949378967285, "step": 117120 }, { "epoch": 0.9454888887095082, "grad_norm": 3.9680726528167725, "learning_rate": 1.0910882611891474e-06, "loss": 2.250886344909668, "step": 117130 }, { "epoch": 0.9455696100352752, "grad_norm": 2.6290030479431152, "learning_rate": 1.0894727914509343e-06, "loss": 2.186090087890625, "step": 117140 }, { "epoch": 0.9456503313610423, "grad_norm": 2.8193070888519287, "learning_rate": 1.0878573217127211e-06, "loss": 2.3823923110961913, "step": 117150 }, { "epoch": 0.9457310526868093, "grad_norm": 5.401296615600586, "learning_rate": 1.086241851974508e-06, "loss": 2.2643388748168944, "step": 117160 }, { "epoch": 0.9458117740125764, "grad_norm": 3.1845779418945312, "learning_rate": 1.0846263822362948e-06, "loss": 2.1544918060302733, "step": 117170 }, { "epoch": 0.9458924953383434, "grad_norm": 3.9141149520874023, "learning_rate": 1.0830109124980817e-06, "loss": 2.669502830505371, "step": 117180 }, { "epoch": 0.9459732166641105, "grad_norm": 3.705472469329834, "learning_rate": 1.0813954427598685e-06, "loss": 2.607246971130371, "step": 117190 }, { "epoch": 0.9460539379898776, "grad_norm": 4.939969539642334, "learning_rate": 1.0797799730216554e-06, "loss": 2.5241296768188475, "step": 117200 }, { "epoch": 0.9461346593156446, "grad_norm": 3.253103733062744, "learning_rate": 1.0781645032834422e-06, "loss": 2.2321313858032226, "step": 117210 }, { "epoch": 0.9462153806414116, "grad_norm": 5.479300498962402, "learning_rate": 1.0765490335452293e-06, "loss": 2.408058929443359, "step": 117220 }, { "epoch": 0.9462961019671787, "grad_norm": 2.6458740234375, "learning_rate": 1.074933563807016e-06, "loss": 1.916290283203125, "step": 117230 }, { "epoch": 0.9463768232929458, "grad_norm": 2.4173030853271484, "learning_rate": 1.073318094068803e-06, "loss": 2.554597282409668, "step": 117240 }, { "epoch": 0.9464575446187128, "grad_norm": 4.352552890777588, "learning_rate": 1.0717026243305898e-06, "loss": 2.1222286224365234, "step": 117250 }, { "epoch": 0.9465382659444799, "grad_norm": 3.5899016857147217, "learning_rate": 1.0700871545923766e-06, "loss": 1.8909278869628907, "step": 117260 }, { "epoch": 0.946618987270247, "grad_norm": 4.910401344299316, "learning_rate": 1.0684716848541637e-06, "loss": 2.6156177520751953, "step": 117270 }, { "epoch": 0.9466997085960139, "grad_norm": 2.5324325561523438, "learning_rate": 1.0668562151159505e-06, "loss": 2.2170345306396486, "step": 117280 }, { "epoch": 0.946780429921781, "grad_norm": 1.7836909294128418, "learning_rate": 1.0652407453777374e-06, "loss": 2.0906070709228515, "step": 117290 }, { "epoch": 0.9468611512475481, "grad_norm": 3.3904831409454346, "learning_rate": 1.0636252756395242e-06, "loss": 2.355894660949707, "step": 117300 }, { "epoch": 0.9469418725733152, "grad_norm": 9.095498085021973, "learning_rate": 1.062009805901311e-06, "loss": 2.5594467163085937, "step": 117310 }, { "epoch": 0.9470225938990822, "grad_norm": 4.798799514770508, "learning_rate": 1.060394336163098e-06, "loss": 2.361113166809082, "step": 117320 }, { "epoch": 0.9471033152248493, "grad_norm": 4.225283145904541, "learning_rate": 1.0587788664248848e-06, "loss": 2.064060020446777, "step": 117330 }, { "epoch": 0.9471840365506163, "grad_norm": 4.020675182342529, "learning_rate": 1.0571633966866716e-06, "loss": 2.5926456451416016, "step": 117340 }, { "epoch": 0.9472647578763833, "grad_norm": 5.162586688995361, "learning_rate": 1.0555479269484585e-06, "loss": 2.6198806762695312, "step": 117350 }, { "epoch": 0.9473454792021504, "grad_norm": 3.5278425216674805, "learning_rate": 1.0539324572102453e-06, "loss": 2.392536163330078, "step": 117360 }, { "epoch": 0.9474262005279175, "grad_norm": 4.7026824951171875, "learning_rate": 1.0523169874720322e-06, "loss": 2.334041213989258, "step": 117370 }, { "epoch": 0.9475069218536846, "grad_norm": 3.3112220764160156, "learning_rate": 1.050701517733819e-06, "loss": 2.488105583190918, "step": 117380 }, { "epoch": 0.9475876431794515, "grad_norm": 3.315009117126465, "learning_rate": 1.049086047995606e-06, "loss": 2.5285694122314455, "step": 117390 }, { "epoch": 0.9476683645052186, "grad_norm": 3.658463478088379, "learning_rate": 1.047470578257393e-06, "loss": 1.905293846130371, "step": 117400 }, { "epoch": 0.9477490858309857, "grad_norm": 4.038325786590576, "learning_rate": 1.0458551085191798e-06, "loss": 2.6127975463867186, "step": 117410 }, { "epoch": 0.9478298071567527, "grad_norm": 4.330641269683838, "learning_rate": 1.0442396387809666e-06, "loss": 2.0123857498168944, "step": 117420 }, { "epoch": 0.9479105284825198, "grad_norm": 3.819023609161377, "learning_rate": 1.0426241690427534e-06, "loss": 2.855343055725098, "step": 117430 }, { "epoch": 0.9479912498082869, "grad_norm": 2.365978479385376, "learning_rate": 1.0410086993045405e-06, "loss": 2.357827377319336, "step": 117440 }, { "epoch": 0.948071971134054, "grad_norm": 3.8711862564086914, "learning_rate": 1.0393932295663273e-06, "loss": 2.1659835815429687, "step": 117450 }, { "epoch": 0.9481526924598209, "grad_norm": 3.4045536518096924, "learning_rate": 1.0377777598281142e-06, "loss": 1.840669822692871, "step": 117460 }, { "epoch": 0.948233413785588, "grad_norm": 5.123941898345947, "learning_rate": 1.036162290089901e-06, "loss": 2.132535934448242, "step": 117470 }, { "epoch": 0.9483141351113551, "grad_norm": 3.2184696197509766, "learning_rate": 1.0345468203516879e-06, "loss": 2.8976341247558595, "step": 117480 }, { "epoch": 0.9483948564371221, "grad_norm": 3.894068717956543, "learning_rate": 1.0329313506134747e-06, "loss": 2.47847843170166, "step": 117490 }, { "epoch": 0.9484755777628892, "grad_norm": 4.6727423667907715, "learning_rate": 1.0313158808752616e-06, "loss": 2.393410873413086, "step": 117500 }, { "epoch": 0.9485562990886562, "grad_norm": 3.1399173736572266, "learning_rate": 1.0297004111370484e-06, "loss": 2.4739896774291994, "step": 117510 }, { "epoch": 0.9486370204144233, "grad_norm": 3.1576220989227295, "learning_rate": 1.0280849413988353e-06, "loss": 2.1856155395507812, "step": 117520 }, { "epoch": 0.9487177417401903, "grad_norm": 3.483785629272461, "learning_rate": 1.0264694716606221e-06, "loss": 2.3242034912109375, "step": 117530 }, { "epoch": 0.9487984630659574, "grad_norm": 4.756810188293457, "learning_rate": 1.024854001922409e-06, "loss": 2.4095773696899414, "step": 117540 }, { "epoch": 0.9488791843917245, "grad_norm": 3.66845965385437, "learning_rate": 1.0232385321841958e-06, "loss": 2.928618812561035, "step": 117550 }, { "epoch": 0.9489599057174916, "grad_norm": 3.348344326019287, "learning_rate": 1.0216230624459826e-06, "loss": 2.419312286376953, "step": 117560 }, { "epoch": 0.9490406270432585, "grad_norm": 4.3841552734375, "learning_rate": 1.0200075927077697e-06, "loss": 2.4567129135131838, "step": 117570 }, { "epoch": 0.9491213483690256, "grad_norm": 3.653858184814453, "learning_rate": 1.0183921229695566e-06, "loss": 2.409186363220215, "step": 117580 }, { "epoch": 0.9492020696947927, "grad_norm": 4.982837677001953, "learning_rate": 1.0167766532313434e-06, "loss": 2.371246337890625, "step": 117590 }, { "epoch": 0.9492827910205597, "grad_norm": 3.5593206882476807, "learning_rate": 1.0151611834931302e-06, "loss": 2.8795598983764648, "step": 117600 }, { "epoch": 0.9493635123463268, "grad_norm": 3.710685968399048, "learning_rate": 1.0135457137549173e-06, "loss": 2.40784912109375, "step": 117610 }, { "epoch": 0.9494442336720939, "grad_norm": 4.140685558319092, "learning_rate": 1.0119302440167041e-06, "loss": 2.028053855895996, "step": 117620 }, { "epoch": 0.9495249549978609, "grad_norm": 2.664106845855713, "learning_rate": 1.010314774278491e-06, "loss": 2.4104381561279298, "step": 117630 }, { "epoch": 0.9496056763236279, "grad_norm": 5.1110920906066895, "learning_rate": 1.0086993045402778e-06, "loss": 2.277633285522461, "step": 117640 }, { "epoch": 0.949686397649395, "grad_norm": 3.481027364730835, "learning_rate": 1.0070838348020647e-06, "loss": 2.4776046752929686, "step": 117650 }, { "epoch": 0.9497671189751621, "grad_norm": 2.985530376434326, "learning_rate": 1.0054683650638515e-06, "loss": 2.309804916381836, "step": 117660 }, { "epoch": 0.9498478403009291, "grad_norm": 4.73109245300293, "learning_rate": 1.0038528953256384e-06, "loss": 2.6257999420166014, "step": 117670 }, { "epoch": 0.9499285616266961, "grad_norm": 3.587385654449463, "learning_rate": 1.0022374255874252e-06, "loss": 2.352667236328125, "step": 117680 }, { "epoch": 0.9500092829524632, "grad_norm": 3.9676549434661865, "learning_rate": 1.000621955849212e-06, "loss": 2.2933652877807615, "step": 117690 }, { "epoch": 0.9500900042782303, "grad_norm": 3.4390571117401123, "learning_rate": 9.99006486110999e-07, "loss": 2.2241430282592773, "step": 117700 }, { "epoch": 0.9501707256039973, "grad_norm": 3.0804057121276855, "learning_rate": 9.973910163727858e-07, "loss": 1.974250602722168, "step": 117710 }, { "epoch": 0.9502514469297644, "grad_norm": 3.824368953704834, "learning_rate": 9.957755466345726e-07, "loss": 1.9644048690795899, "step": 117720 }, { "epoch": 0.9503321682555315, "grad_norm": 1.7979494333267212, "learning_rate": 9.941600768963597e-07, "loss": 1.9463146209716797, "step": 117730 }, { "epoch": 0.9504128895812984, "grad_norm": 3.786574125289917, "learning_rate": 9.925446071581465e-07, "loss": 2.3409175872802734, "step": 117740 }, { "epoch": 0.9504936109070655, "grad_norm": 4.156895160675049, "learning_rate": 9.909291374199334e-07, "loss": 2.4032955169677734, "step": 117750 }, { "epoch": 0.9505743322328326, "grad_norm": 8.99160385131836, "learning_rate": 9.893136676817202e-07, "loss": 2.8633899688720703, "step": 117760 }, { "epoch": 0.9506550535585997, "grad_norm": 2.600470542907715, "learning_rate": 9.87698197943507e-07, "loss": 2.1580789566040037, "step": 117770 }, { "epoch": 0.9507357748843667, "grad_norm": 2.4447107315063477, "learning_rate": 9.860827282052939e-07, "loss": 2.5134119033813476, "step": 117780 }, { "epoch": 0.9508164962101338, "grad_norm": 3.0727274417877197, "learning_rate": 9.84467258467081e-07, "loss": 2.5907468795776367, "step": 117790 }, { "epoch": 0.9508972175359008, "grad_norm": 5.401022911071777, "learning_rate": 9.828517887288678e-07, "loss": 2.3857675552368165, "step": 117800 }, { "epoch": 0.9509779388616678, "grad_norm": 5.389813423156738, "learning_rate": 9.812363189906546e-07, "loss": 2.1132556915283205, "step": 117810 }, { "epoch": 0.9510586601874349, "grad_norm": 5.045388698577881, "learning_rate": 9.796208492524415e-07, "loss": 2.1916263580322264, "step": 117820 }, { "epoch": 0.951139381513202, "grad_norm": 4.345901966094971, "learning_rate": 9.780053795142283e-07, "loss": 2.2555402755737304, "step": 117830 }, { "epoch": 0.9512201028389691, "grad_norm": 3.098520040512085, "learning_rate": 9.763899097760152e-07, "loss": 2.107094383239746, "step": 117840 }, { "epoch": 0.951300824164736, "grad_norm": 3.0183115005493164, "learning_rate": 9.74774440037802e-07, "loss": 2.350001335144043, "step": 117850 }, { "epoch": 0.9513815454905031, "grad_norm": 3.608729362487793, "learning_rate": 9.731589702995889e-07, "loss": 2.674156379699707, "step": 117860 }, { "epoch": 0.9514622668162702, "grad_norm": 4.739294528961182, "learning_rate": 9.715435005613757e-07, "loss": 2.5112361907958984, "step": 117870 }, { "epoch": 0.9515429881420372, "grad_norm": 3.111814260482788, "learning_rate": 9.699280308231628e-07, "loss": 2.3676689147949217, "step": 117880 }, { "epoch": 0.9516237094678043, "grad_norm": 2.8575973510742188, "learning_rate": 9.683125610849496e-07, "loss": 2.138043975830078, "step": 117890 }, { "epoch": 0.9517044307935714, "grad_norm": 3.4155502319335938, "learning_rate": 9.666970913467365e-07, "loss": 2.127114486694336, "step": 117900 }, { "epoch": 0.9517851521193385, "grad_norm": 2.5872750282287598, "learning_rate": 9.650816216085233e-07, "loss": 2.3248622894287108, "step": 117910 }, { "epoch": 0.9518658734451054, "grad_norm": 1.953909158706665, "learning_rate": 9.634661518703102e-07, "loss": 2.054470443725586, "step": 117920 }, { "epoch": 0.9519465947708725, "grad_norm": 2.782914161682129, "learning_rate": 9.61850682132097e-07, "loss": 2.3515275955200194, "step": 117930 }, { "epoch": 0.9520273160966396, "grad_norm": 9.158121109008789, "learning_rate": 9.602352123938838e-07, "loss": 2.8833009719848635, "step": 117940 }, { "epoch": 0.9521080374224066, "grad_norm": 4.176840305328369, "learning_rate": 9.586197426556707e-07, "loss": 2.1404342651367188, "step": 117950 }, { "epoch": 0.9521887587481737, "grad_norm": 12.051448822021484, "learning_rate": 9.570042729174577e-07, "loss": 2.8533842086791994, "step": 117960 }, { "epoch": 0.9522694800739407, "grad_norm": 3.24454927444458, "learning_rate": 9.553888031792446e-07, "loss": 2.3316844940185546, "step": 117970 }, { "epoch": 0.9523502013997078, "grad_norm": 4.828128814697266, "learning_rate": 9.537733334410314e-07, "loss": 2.0030059814453125, "step": 117980 }, { "epoch": 0.9524309227254748, "grad_norm": 3.895101547241211, "learning_rate": 9.521578637028183e-07, "loss": 2.5895843505859375, "step": 117990 }, { "epoch": 0.9525116440512419, "grad_norm": 3.782580614089966, "learning_rate": 9.505423939646051e-07, "loss": 2.6206876754760744, "step": 118000 }, { "epoch": 0.952592365377009, "grad_norm": 4.0007853507995605, "learning_rate": 9.48926924226392e-07, "loss": 2.580272674560547, "step": 118010 }, { "epoch": 0.9526730867027761, "grad_norm": 4.227759838104248, "learning_rate": 9.473114544881788e-07, "loss": 2.4035139083862305, "step": 118020 }, { "epoch": 0.952753808028543, "grad_norm": 3.3736395835876465, "learning_rate": 9.456959847499657e-07, "loss": 2.5742029190063476, "step": 118030 }, { "epoch": 0.9528345293543101, "grad_norm": 4.284529685974121, "learning_rate": 9.440805150117527e-07, "loss": 2.1573591232299805, "step": 118040 }, { "epoch": 0.9529152506800772, "grad_norm": 3.9584450721740723, "learning_rate": 9.424650452735396e-07, "loss": 2.1543182373046874, "step": 118050 }, { "epoch": 0.9529959720058442, "grad_norm": 2.3129496574401855, "learning_rate": 9.408495755353264e-07, "loss": 2.4356094360351563, "step": 118060 }, { "epoch": 0.9530766933316113, "grad_norm": 2.6050899028778076, "learning_rate": 9.392341057971133e-07, "loss": 2.3274221420288086, "step": 118070 }, { "epoch": 0.9531574146573784, "grad_norm": 3.9085445404052734, "learning_rate": 9.376186360589001e-07, "loss": 2.345301628112793, "step": 118080 }, { "epoch": 0.9532381359831454, "grad_norm": 2.431626796722412, "learning_rate": 9.36003166320687e-07, "loss": 2.169120979309082, "step": 118090 }, { "epoch": 0.9533188573089124, "grad_norm": 5.765065670013428, "learning_rate": 9.343876965824739e-07, "loss": 2.4071836471557617, "step": 118100 }, { "epoch": 0.9533995786346795, "grad_norm": 3.0796918869018555, "learning_rate": 9.327722268442607e-07, "loss": 2.345168685913086, "step": 118110 }, { "epoch": 0.9534802999604466, "grad_norm": 7.1226887702941895, "learning_rate": 9.311567571060476e-07, "loss": 1.711054229736328, "step": 118120 }, { "epoch": 0.9535610212862136, "grad_norm": 3.840528726577759, "learning_rate": 9.295412873678344e-07, "loss": 2.2057397842407225, "step": 118130 }, { "epoch": 0.9536417426119806, "grad_norm": 5.761154651641846, "learning_rate": 9.279258176296213e-07, "loss": 2.6005117416381838, "step": 118140 }, { "epoch": 0.9537224639377477, "grad_norm": 4.995433330535889, "learning_rate": 9.263103478914081e-07, "loss": 2.4931865692138673, "step": 118150 }, { "epoch": 0.9538031852635148, "grad_norm": 5.026123523712158, "learning_rate": 9.24694878153195e-07, "loss": 2.618178939819336, "step": 118160 }, { "epoch": 0.9538839065892818, "grad_norm": 5.0092387199401855, "learning_rate": 9.230794084149819e-07, "loss": 2.5482473373413086, "step": 118170 }, { "epoch": 0.9539646279150489, "grad_norm": 2.175990343093872, "learning_rate": 9.214639386767688e-07, "loss": 2.158620262145996, "step": 118180 }, { "epoch": 0.954045349240816, "grad_norm": 3.637101650238037, "learning_rate": 9.198484689385556e-07, "loss": 2.336282157897949, "step": 118190 }, { "epoch": 0.9541260705665829, "grad_norm": 4.244119644165039, "learning_rate": 9.182329992003426e-07, "loss": 2.7363775253295897, "step": 118200 }, { "epoch": 0.95420679189235, "grad_norm": 3.4634854793548584, "learning_rate": 9.166175294621295e-07, "loss": 2.4653045654296877, "step": 118210 }, { "epoch": 0.9542875132181171, "grad_norm": 3.266934394836426, "learning_rate": 9.150020597239164e-07, "loss": 2.5904367446899412, "step": 118220 }, { "epoch": 0.9543682345438842, "grad_norm": 3.296165943145752, "learning_rate": 9.133865899857032e-07, "loss": 2.212140655517578, "step": 118230 }, { "epoch": 0.9544489558696512, "grad_norm": 3.875214099884033, "learning_rate": 9.117711202474901e-07, "loss": 1.961985206604004, "step": 118240 }, { "epoch": 0.9545296771954183, "grad_norm": 3.996673345565796, "learning_rate": 9.101556505092769e-07, "loss": 2.2217493057250977, "step": 118250 }, { "epoch": 0.9546103985211853, "grad_norm": 3.579277753829956, "learning_rate": 9.085401807710638e-07, "loss": 2.3622838973999025, "step": 118260 }, { "epoch": 0.9546911198469523, "grad_norm": 6.370048999786377, "learning_rate": 9.069247110328507e-07, "loss": 2.0302007675170897, "step": 118270 }, { "epoch": 0.9547718411727194, "grad_norm": 3.7481601238250732, "learning_rate": 9.053092412946375e-07, "loss": 2.577177810668945, "step": 118280 }, { "epoch": 0.9548525624984865, "grad_norm": 5.010561943054199, "learning_rate": 9.036937715564244e-07, "loss": 2.470495414733887, "step": 118290 }, { "epoch": 0.9549332838242536, "grad_norm": 5.4599456787109375, "learning_rate": 9.020783018182112e-07, "loss": 2.4139785766601562, "step": 118300 }, { "epoch": 0.9550140051500206, "grad_norm": 2.7933437824249268, "learning_rate": 9.004628320799981e-07, "loss": 2.3529521942138674, "step": 118310 }, { "epoch": 0.9550947264757876, "grad_norm": 4.198150157928467, "learning_rate": 8.988473623417849e-07, "loss": 2.498872756958008, "step": 118320 }, { "epoch": 0.9551754478015547, "grad_norm": 3.5807199478149414, "learning_rate": 8.972318926035718e-07, "loss": 2.4114124298095705, "step": 118330 }, { "epoch": 0.9552561691273217, "grad_norm": 3.615164279937744, "learning_rate": 8.956164228653587e-07, "loss": 2.452546501159668, "step": 118340 }, { "epoch": 0.9553368904530888, "grad_norm": 4.9072136878967285, "learning_rate": 8.940009531271457e-07, "loss": 2.112765884399414, "step": 118350 }, { "epoch": 0.9554176117788559, "grad_norm": 5.74073600769043, "learning_rate": 8.923854833889325e-07, "loss": 2.3688819885253904, "step": 118360 }, { "epoch": 0.955498333104623, "grad_norm": 3.3077166080474854, "learning_rate": 8.907700136507194e-07, "loss": 2.3972141265869142, "step": 118370 }, { "epoch": 0.9555790544303899, "grad_norm": 3.758368968963623, "learning_rate": 8.891545439125063e-07, "loss": 1.8995529174804688, "step": 118380 }, { "epoch": 0.955659775756157, "grad_norm": 3.2564547061920166, "learning_rate": 8.875390741742932e-07, "loss": 2.5041685104370117, "step": 118390 }, { "epoch": 0.9557404970819241, "grad_norm": 3.5461812019348145, "learning_rate": 8.8592360443608e-07, "loss": 2.2265499114990233, "step": 118400 }, { "epoch": 0.9558212184076911, "grad_norm": 3.755204439163208, "learning_rate": 8.843081346978669e-07, "loss": 2.335698699951172, "step": 118410 }, { "epoch": 0.9559019397334582, "grad_norm": 4.455943584442139, "learning_rate": 8.826926649596537e-07, "loss": 2.9821493148803713, "step": 118420 }, { "epoch": 0.9559826610592252, "grad_norm": 3.683978796005249, "learning_rate": 8.810771952214406e-07, "loss": 2.413543701171875, "step": 118430 }, { "epoch": 0.9560633823849923, "grad_norm": 3.857142210006714, "learning_rate": 8.794617254832274e-07, "loss": 2.1077529907226564, "step": 118440 }, { "epoch": 0.9561441037107593, "grad_norm": 2.775352716445923, "learning_rate": 8.778462557450144e-07, "loss": 2.3491085052490233, "step": 118450 }, { "epoch": 0.9562248250365264, "grad_norm": 3.8397645950317383, "learning_rate": 8.762307860068012e-07, "loss": 2.399957847595215, "step": 118460 }, { "epoch": 0.9563055463622935, "grad_norm": 1.454602837562561, "learning_rate": 8.74615316268588e-07, "loss": 2.3732275009155273, "step": 118470 }, { "epoch": 0.9563862676880605, "grad_norm": 3.040832042694092, "learning_rate": 8.729998465303749e-07, "loss": 2.3192981719970702, "step": 118480 }, { "epoch": 0.9564669890138275, "grad_norm": 5.088533401489258, "learning_rate": 8.713843767921617e-07, "loss": 2.4561227798461913, "step": 118490 }, { "epoch": 0.9565477103395946, "grad_norm": 3.1210556030273438, "learning_rate": 8.697689070539486e-07, "loss": 2.0432527542114256, "step": 118500 }, { "epoch": 0.9566284316653617, "grad_norm": 4.044764041900635, "learning_rate": 8.681534373157356e-07, "loss": 2.5299205780029297, "step": 118510 }, { "epoch": 0.9567091529911287, "grad_norm": 3.4152426719665527, "learning_rate": 8.665379675775225e-07, "loss": 2.3787479400634766, "step": 118520 }, { "epoch": 0.9567898743168958, "grad_norm": 2.795173168182373, "learning_rate": 8.649224978393093e-07, "loss": 2.1938735961914064, "step": 118530 }, { "epoch": 0.9568705956426629, "grad_norm": 3.600869655609131, "learning_rate": 8.633070281010962e-07, "loss": 2.317874717712402, "step": 118540 }, { "epoch": 0.9569513169684299, "grad_norm": 3.757176637649536, "learning_rate": 8.61691558362883e-07, "loss": 2.913935661315918, "step": 118550 }, { "epoch": 0.9570320382941969, "grad_norm": 3.4094631671905518, "learning_rate": 8.6007608862467e-07, "loss": 2.3363225936889647, "step": 118560 }, { "epoch": 0.957112759619964, "grad_norm": 2.9110116958618164, "learning_rate": 8.584606188864568e-07, "loss": 1.993475341796875, "step": 118570 }, { "epoch": 0.9571934809457311, "grad_norm": 3.7619974613189697, "learning_rate": 8.568451491482437e-07, "loss": 2.4495147705078124, "step": 118580 }, { "epoch": 0.9572742022714981, "grad_norm": 4.405552387237549, "learning_rate": 8.552296794100305e-07, "loss": 2.421430969238281, "step": 118590 }, { "epoch": 0.9573549235972652, "grad_norm": 5.0807108879089355, "learning_rate": 8.536142096718174e-07, "loss": 2.5852113723754884, "step": 118600 }, { "epoch": 0.9574356449230322, "grad_norm": 3.484607696533203, "learning_rate": 8.519987399336042e-07, "loss": 1.7447547912597656, "step": 118610 }, { "epoch": 0.9575163662487993, "grad_norm": 3.217242956161499, "learning_rate": 8.503832701953912e-07, "loss": 2.4988101959228515, "step": 118620 }, { "epoch": 0.9575970875745663, "grad_norm": 2.311389923095703, "learning_rate": 8.48767800457178e-07, "loss": 1.8317274093627929, "step": 118630 }, { "epoch": 0.9576778089003334, "grad_norm": 2.49212646484375, "learning_rate": 8.471523307189648e-07, "loss": 2.1618785858154297, "step": 118640 }, { "epoch": 0.9577585302261005, "grad_norm": 3.0866787433624268, "learning_rate": 8.455368609807517e-07, "loss": 2.5053457260131835, "step": 118650 }, { "epoch": 0.9578392515518674, "grad_norm": 4.105429172515869, "learning_rate": 8.439213912425385e-07, "loss": 2.1797565460205077, "step": 118660 }, { "epoch": 0.9579199728776345, "grad_norm": 3.7693636417388916, "learning_rate": 8.423059215043256e-07, "loss": 2.1815704345703124, "step": 118670 }, { "epoch": 0.9580006942034016, "grad_norm": 2.9421937465667725, "learning_rate": 8.406904517661124e-07, "loss": 2.2457740783691404, "step": 118680 }, { "epoch": 0.9580814155291687, "grad_norm": 2.899332046508789, "learning_rate": 8.390749820278993e-07, "loss": 2.3296770095825194, "step": 118690 }, { "epoch": 0.9581621368549357, "grad_norm": 3.1757311820983887, "learning_rate": 8.374595122896861e-07, "loss": 2.6312952041625977, "step": 118700 }, { "epoch": 0.9582428581807028, "grad_norm": 2.911667823791504, "learning_rate": 8.35844042551473e-07, "loss": 1.9091243743896484, "step": 118710 }, { "epoch": 0.9583235795064698, "grad_norm": 3.354069232940674, "learning_rate": 8.342285728132598e-07, "loss": 2.076968193054199, "step": 118720 }, { "epoch": 0.9584043008322368, "grad_norm": 4.318023681640625, "learning_rate": 8.326131030750468e-07, "loss": 1.9550750732421875, "step": 118730 }, { "epoch": 0.9584850221580039, "grad_norm": 4.146530628204346, "learning_rate": 8.309976333368336e-07, "loss": 2.443273735046387, "step": 118740 }, { "epoch": 0.958565743483771, "grad_norm": 2.836428165435791, "learning_rate": 8.293821635986205e-07, "loss": 1.9020044326782226, "step": 118750 }, { "epoch": 0.9586464648095381, "grad_norm": 4.431434154510498, "learning_rate": 8.277666938604073e-07, "loss": 2.516247367858887, "step": 118760 }, { "epoch": 0.958727186135305, "grad_norm": 2.8518104553222656, "learning_rate": 8.261512241221942e-07, "loss": 2.2128473281860352, "step": 118770 }, { "epoch": 0.9588079074610721, "grad_norm": 4.385926246643066, "learning_rate": 8.24535754383981e-07, "loss": 2.63563232421875, "step": 118780 }, { "epoch": 0.9588886287868392, "grad_norm": 3.160985231399536, "learning_rate": 8.229202846457678e-07, "loss": 2.7840585708618164, "step": 118790 }, { "epoch": 0.9589693501126062, "grad_norm": 4.6160969734191895, "learning_rate": 8.213048149075548e-07, "loss": 2.4652509689331055, "step": 118800 }, { "epoch": 0.9590500714383733, "grad_norm": 3.302821397781372, "learning_rate": 8.196893451693416e-07, "loss": 2.3740835189819336, "step": 118810 }, { "epoch": 0.9591307927641404, "grad_norm": 3.036802291870117, "learning_rate": 8.180738754311286e-07, "loss": 2.5995254516601562, "step": 118820 }, { "epoch": 0.9592115140899075, "grad_norm": 4.016059875488281, "learning_rate": 8.164584056929154e-07, "loss": 1.9989051818847656, "step": 118830 }, { "epoch": 0.9592922354156744, "grad_norm": 4.583968162536621, "learning_rate": 8.148429359547024e-07, "loss": 2.394546890258789, "step": 118840 }, { "epoch": 0.9593729567414415, "grad_norm": 4.247672080993652, "learning_rate": 8.132274662164892e-07, "loss": 2.5145729064941404, "step": 118850 }, { "epoch": 0.9594536780672086, "grad_norm": 2.4265596866607666, "learning_rate": 8.116119964782761e-07, "loss": 2.5556880950927736, "step": 118860 }, { "epoch": 0.9595343993929756, "grad_norm": 4.196887016296387, "learning_rate": 8.099965267400629e-07, "loss": 2.181220817565918, "step": 118870 }, { "epoch": 0.9596151207187427, "grad_norm": 4.033860206604004, "learning_rate": 8.083810570018498e-07, "loss": 2.3347522735595705, "step": 118880 }, { "epoch": 0.9596958420445098, "grad_norm": 2.9758431911468506, "learning_rate": 8.067655872636366e-07, "loss": 2.391083908081055, "step": 118890 }, { "epoch": 0.9597765633702768, "grad_norm": 3.390108108520508, "learning_rate": 8.051501175254236e-07, "loss": 2.1830352783203124, "step": 118900 }, { "epoch": 0.9598572846960438, "grad_norm": 2.4160397052764893, "learning_rate": 8.035346477872104e-07, "loss": 2.349507713317871, "step": 118910 }, { "epoch": 0.9599380060218109, "grad_norm": 2.6835668087005615, "learning_rate": 8.019191780489973e-07, "loss": 2.5312013626098633, "step": 118920 }, { "epoch": 0.960018727347578, "grad_norm": 2.055586338043213, "learning_rate": 8.003037083107841e-07, "loss": 2.1650123596191406, "step": 118930 }, { "epoch": 0.960099448673345, "grad_norm": 3.4335429668426514, "learning_rate": 7.98688238572571e-07, "loss": 2.458338165283203, "step": 118940 }, { "epoch": 0.960180169999112, "grad_norm": 2.556145191192627, "learning_rate": 7.970727688343578e-07, "loss": 2.0011018753051757, "step": 118950 }, { "epoch": 0.9602608913248791, "grad_norm": 2.56569242477417, "learning_rate": 7.954572990961446e-07, "loss": 2.127098274230957, "step": 118960 }, { "epoch": 0.9603416126506462, "grad_norm": 5.499950885772705, "learning_rate": 7.938418293579316e-07, "loss": 2.534249114990234, "step": 118970 }, { "epoch": 0.9604223339764132, "grad_norm": 3.8887555599212646, "learning_rate": 7.922263596197185e-07, "loss": 2.4093677520751955, "step": 118980 }, { "epoch": 0.9605030553021803, "grad_norm": 2.7825121879577637, "learning_rate": 7.906108898815054e-07, "loss": 2.8184219360351563, "step": 118990 }, { "epoch": 0.9605837766279474, "grad_norm": 3.263387441635132, "learning_rate": 7.889954201432922e-07, "loss": 2.2797592163085936, "step": 119000 }, { "epoch": 0.9606644979537144, "grad_norm": 4.24951171875, "learning_rate": 7.873799504050792e-07, "loss": 2.2788047790527344, "step": 119010 }, { "epoch": 0.9607452192794814, "grad_norm": 3.8776350021362305, "learning_rate": 7.85764480666866e-07, "loss": 2.3549272537231447, "step": 119020 }, { "epoch": 0.9608259406052485, "grad_norm": 2.6345741748809814, "learning_rate": 7.841490109286529e-07, "loss": 2.407227325439453, "step": 119030 }, { "epoch": 0.9609066619310156, "grad_norm": 3.7808492183685303, "learning_rate": 7.825335411904397e-07, "loss": 2.575173568725586, "step": 119040 }, { "epoch": 0.9609873832567826, "grad_norm": 1.9463872909545898, "learning_rate": 7.809180714522266e-07, "loss": 1.9527379989624023, "step": 119050 }, { "epoch": 0.9610681045825497, "grad_norm": 4.681628704071045, "learning_rate": 7.793026017140134e-07, "loss": 2.2246124267578127, "step": 119060 }, { "epoch": 0.9611488259083167, "grad_norm": 2.4786014556884766, "learning_rate": 7.776871319758003e-07, "loss": 2.0290395736694338, "step": 119070 }, { "epoch": 0.9612295472340838, "grad_norm": 3.1546432971954346, "learning_rate": 7.760716622375872e-07, "loss": 2.1549875259399416, "step": 119080 }, { "epoch": 0.9613102685598508, "grad_norm": 3.3178532123565674, "learning_rate": 7.744561924993741e-07, "loss": 1.9138656616210938, "step": 119090 }, { "epoch": 0.9613909898856179, "grad_norm": 3.477583646774292, "learning_rate": 7.728407227611609e-07, "loss": 2.230917549133301, "step": 119100 }, { "epoch": 0.961471711211385, "grad_norm": 3.87432861328125, "learning_rate": 7.712252530229478e-07, "loss": 2.467085075378418, "step": 119110 }, { "epoch": 0.961552432537152, "grad_norm": 4.420289039611816, "learning_rate": 7.696097832847346e-07, "loss": 2.2549386978149415, "step": 119120 }, { "epoch": 0.961633153862919, "grad_norm": 3.631338357925415, "learning_rate": 7.679943135465214e-07, "loss": 2.246504783630371, "step": 119130 }, { "epoch": 0.9617138751886861, "grad_norm": 3.0960867404937744, "learning_rate": 7.663788438083085e-07, "loss": 1.9099552154541015, "step": 119140 }, { "epoch": 0.9617945965144532, "grad_norm": 4.621560573577881, "learning_rate": 7.647633740700953e-07, "loss": 2.016534996032715, "step": 119150 }, { "epoch": 0.9618753178402202, "grad_norm": 3.3931527137756348, "learning_rate": 7.631479043318822e-07, "loss": 2.482322883605957, "step": 119160 }, { "epoch": 0.9619560391659873, "grad_norm": 3.893068790435791, "learning_rate": 7.61532434593669e-07, "loss": 2.9623159408569335, "step": 119170 }, { "epoch": 0.9620367604917544, "grad_norm": 2.929635763168335, "learning_rate": 7.599169648554559e-07, "loss": 2.4286359786987304, "step": 119180 }, { "epoch": 0.9621174818175213, "grad_norm": 3.164231538772583, "learning_rate": 7.583014951172428e-07, "loss": 2.181136131286621, "step": 119190 }, { "epoch": 0.9621982031432884, "grad_norm": 6.3549418449401855, "learning_rate": 7.566860253790297e-07, "loss": 2.2314174652099608, "step": 119200 }, { "epoch": 0.9622789244690555, "grad_norm": 5.292317867279053, "learning_rate": 7.550705556408165e-07, "loss": 2.1303234100341797, "step": 119210 }, { "epoch": 0.9623596457948226, "grad_norm": 2.7315430641174316, "learning_rate": 7.534550859026034e-07, "loss": 2.0671491622924805, "step": 119220 }, { "epoch": 0.9624403671205896, "grad_norm": 3.9711992740631104, "learning_rate": 7.518396161643902e-07, "loss": 2.859700012207031, "step": 119230 }, { "epoch": 0.9625210884463566, "grad_norm": 2.6971073150634766, "learning_rate": 7.502241464261771e-07, "loss": 2.0593708038330076, "step": 119240 }, { "epoch": 0.9626018097721237, "grad_norm": 2.9299750328063965, "learning_rate": 7.48608676687964e-07, "loss": 2.9937101364135743, "step": 119250 }, { "epoch": 0.9626825310978907, "grad_norm": 6.466066837310791, "learning_rate": 7.469932069497509e-07, "loss": 2.348917007446289, "step": 119260 }, { "epoch": 0.9627632524236578, "grad_norm": 2.1808111667633057, "learning_rate": 7.453777372115377e-07, "loss": 2.38765811920166, "step": 119270 }, { "epoch": 0.9628439737494249, "grad_norm": 5.51572322845459, "learning_rate": 7.437622674733246e-07, "loss": 2.729570770263672, "step": 119280 }, { "epoch": 0.962924695075192, "grad_norm": 1.9189767837524414, "learning_rate": 7.421467977351115e-07, "loss": 2.0372854232788087, "step": 119290 }, { "epoch": 0.9630054164009589, "grad_norm": 3.941692352294922, "learning_rate": 7.405313279968985e-07, "loss": 2.6180667877197266, "step": 119300 }, { "epoch": 0.963086137726726, "grad_norm": 4.543814182281494, "learning_rate": 7.389158582586853e-07, "loss": 2.3949312210083007, "step": 119310 }, { "epoch": 0.9631668590524931, "grad_norm": 4.278378009796143, "learning_rate": 7.373003885204721e-07, "loss": 2.858320617675781, "step": 119320 }, { "epoch": 0.9632475803782601, "grad_norm": 4.1667633056640625, "learning_rate": 7.35684918782259e-07, "loss": 2.5917030334472657, "step": 119330 }, { "epoch": 0.9633283017040272, "grad_norm": 3.5899763107299805, "learning_rate": 7.340694490440458e-07, "loss": 2.6773128509521484, "step": 119340 }, { "epoch": 0.9634090230297943, "grad_norm": 3.6743922233581543, "learning_rate": 7.324539793058327e-07, "loss": 2.5723833084106444, "step": 119350 }, { "epoch": 0.9634897443555613, "grad_norm": 3.511690616607666, "learning_rate": 7.308385095676196e-07, "loss": 2.3386533737182615, "step": 119360 }, { "epoch": 0.9635704656813283, "grad_norm": 3.2467126846313477, "learning_rate": 7.292230398294065e-07, "loss": 2.485047721862793, "step": 119370 }, { "epoch": 0.9636511870070954, "grad_norm": 3.30120587348938, "learning_rate": 7.276075700911933e-07, "loss": 2.4628665924072264, "step": 119380 }, { "epoch": 0.9637319083328625, "grad_norm": 3.829916477203369, "learning_rate": 7.259921003529802e-07, "loss": 2.4557586669921876, "step": 119390 }, { "epoch": 0.9638126296586295, "grad_norm": 4.887533664703369, "learning_rate": 7.24376630614767e-07, "loss": 2.3470516204833984, "step": 119400 }, { "epoch": 0.9638933509843965, "grad_norm": 5.934352874755859, "learning_rate": 7.227611608765539e-07, "loss": 2.0722225189208983, "step": 119410 }, { "epoch": 0.9639740723101636, "grad_norm": 3.598480463027954, "learning_rate": 7.211456911383407e-07, "loss": 2.0222446441650392, "step": 119420 }, { "epoch": 0.9640547936359307, "grad_norm": 2.359069585800171, "learning_rate": 7.195302214001277e-07, "loss": 2.905369758605957, "step": 119430 }, { "epoch": 0.9641355149616977, "grad_norm": 2.181431531906128, "learning_rate": 7.179147516619145e-07, "loss": 1.9287813186645508, "step": 119440 }, { "epoch": 0.9642162362874648, "grad_norm": 3.1789019107818604, "learning_rate": 7.162992819237015e-07, "loss": 2.2848094940185546, "step": 119450 }, { "epoch": 0.9642969576132319, "grad_norm": 3.225074291229248, "learning_rate": 7.146838121854883e-07, "loss": 2.073348808288574, "step": 119460 }, { "epoch": 0.9643776789389988, "grad_norm": 1.792428970336914, "learning_rate": 7.130683424472753e-07, "loss": 2.383633041381836, "step": 119470 }, { "epoch": 0.9644584002647659, "grad_norm": 2.89508318901062, "learning_rate": 7.114528727090621e-07, "loss": 2.169403648376465, "step": 119480 }, { "epoch": 0.964539121590533, "grad_norm": 4.617619514465332, "learning_rate": 7.09837402970849e-07, "loss": 2.079942321777344, "step": 119490 }, { "epoch": 0.9646198429163001, "grad_norm": 2.3889968395233154, "learning_rate": 7.082219332326358e-07, "loss": 2.4115447998046875, "step": 119500 }, { "epoch": 0.9647005642420671, "grad_norm": 4.169885158538818, "learning_rate": 7.066064634944226e-07, "loss": 2.1398744583129883, "step": 119510 }, { "epoch": 0.9647812855678342, "grad_norm": 3.911227226257324, "learning_rate": 7.049909937562095e-07, "loss": 2.661310005187988, "step": 119520 }, { "epoch": 0.9648620068936012, "grad_norm": 3.922020435333252, "learning_rate": 7.033755240179964e-07, "loss": 1.8849138259887694, "step": 119530 }, { "epoch": 0.9649427282193683, "grad_norm": 3.023542881011963, "learning_rate": 7.017600542797833e-07, "loss": 2.201156425476074, "step": 119540 }, { "epoch": 0.9650234495451353, "grad_norm": 4.0259294509887695, "learning_rate": 7.001445845415701e-07, "loss": 2.3499345779418945, "step": 119550 }, { "epoch": 0.9651041708709024, "grad_norm": 3.039947032928467, "learning_rate": 6.98529114803357e-07, "loss": 1.9250513076782227, "step": 119560 }, { "epoch": 0.9651848921966695, "grad_norm": 1.844728708267212, "learning_rate": 6.969136450651438e-07, "loss": 2.1485132217407226, "step": 119570 }, { "epoch": 0.9652656135224364, "grad_norm": 4.685358047485352, "learning_rate": 6.952981753269307e-07, "loss": 2.4423297882080077, "step": 119580 }, { "epoch": 0.9653463348482035, "grad_norm": 3.0158486366271973, "learning_rate": 6.936827055887175e-07, "loss": 2.369398498535156, "step": 119590 }, { "epoch": 0.9654270561739706, "grad_norm": 2.80972957611084, "learning_rate": 6.920672358505045e-07, "loss": 2.6276655197143555, "step": 119600 }, { "epoch": 0.9655077774997377, "grad_norm": 4.5148844718933105, "learning_rate": 6.904517661122914e-07, "loss": 2.3846014022827147, "step": 119610 }, { "epoch": 0.9655884988255047, "grad_norm": 3.6959228515625, "learning_rate": 6.888362963740783e-07, "loss": 2.0788618087768556, "step": 119620 }, { "epoch": 0.9656692201512718, "grad_norm": 3.6346750259399414, "learning_rate": 6.872208266358651e-07, "loss": 2.74167423248291, "step": 119630 }, { "epoch": 0.9657499414770389, "grad_norm": 4.292566776275635, "learning_rate": 6.856053568976521e-07, "loss": 2.508354377746582, "step": 119640 }, { "epoch": 0.9658306628028058, "grad_norm": 4.2549662590026855, "learning_rate": 6.839898871594389e-07, "loss": 2.2200382232666014, "step": 119650 }, { "epoch": 0.9659113841285729, "grad_norm": 4.602463722229004, "learning_rate": 6.823744174212257e-07, "loss": 2.179268455505371, "step": 119660 }, { "epoch": 0.96599210545434, "grad_norm": 2.515308141708374, "learning_rate": 6.807589476830126e-07, "loss": 1.9468149185180663, "step": 119670 }, { "epoch": 0.9660728267801071, "grad_norm": 2.6198999881744385, "learning_rate": 6.791434779447994e-07, "loss": 2.6852840423583983, "step": 119680 }, { "epoch": 0.9661535481058741, "grad_norm": 4.0296406745910645, "learning_rate": 6.775280082065863e-07, "loss": 2.6496042251586913, "step": 119690 }, { "epoch": 0.9662342694316411, "grad_norm": 2.840761661529541, "learning_rate": 6.759125384683731e-07, "loss": 1.596292209625244, "step": 119700 }, { "epoch": 0.9663149907574082, "grad_norm": 5.207286357879639, "learning_rate": 6.742970687301601e-07, "loss": 2.3895553588867187, "step": 119710 }, { "epoch": 0.9663957120831752, "grad_norm": 3.153287410736084, "learning_rate": 6.726815989919469e-07, "loss": 2.590606689453125, "step": 119720 }, { "epoch": 0.9664764334089423, "grad_norm": 3.263270854949951, "learning_rate": 6.710661292537338e-07, "loss": 2.227199745178223, "step": 119730 }, { "epoch": 0.9665571547347094, "grad_norm": 4.011066913604736, "learning_rate": 6.694506595155206e-07, "loss": 2.2427791595458983, "step": 119740 }, { "epoch": 0.9666378760604765, "grad_norm": 4.138692855834961, "learning_rate": 6.678351897773075e-07, "loss": 2.1696283340454103, "step": 119750 }, { "epoch": 0.9667185973862434, "grad_norm": 3.3684308528900146, "learning_rate": 6.662197200390943e-07, "loss": 2.6985183715820313, "step": 119760 }, { "epoch": 0.9667993187120105, "grad_norm": 5.852397441864014, "learning_rate": 6.646042503008814e-07, "loss": 2.5677410125732423, "step": 119770 }, { "epoch": 0.9668800400377776, "grad_norm": 3.889694929122925, "learning_rate": 6.629887805626682e-07, "loss": 1.6697261810302735, "step": 119780 }, { "epoch": 0.9669607613635446, "grad_norm": 3.016441822052002, "learning_rate": 6.613733108244551e-07, "loss": 2.293812942504883, "step": 119790 }, { "epoch": 0.9670414826893117, "grad_norm": 4.897902965545654, "learning_rate": 6.597578410862419e-07, "loss": 2.05360107421875, "step": 119800 }, { "epoch": 0.9671222040150788, "grad_norm": 2.943492889404297, "learning_rate": 6.581423713480288e-07, "loss": 2.2897525787353517, "step": 119810 }, { "epoch": 0.9672029253408458, "grad_norm": 2.950939893722534, "learning_rate": 6.565269016098157e-07, "loss": 2.299024963378906, "step": 119820 }, { "epoch": 0.9672836466666128, "grad_norm": 2.7500932216644287, "learning_rate": 6.549114318716025e-07, "loss": 2.449335479736328, "step": 119830 }, { "epoch": 0.9673643679923799, "grad_norm": 3.6228997707366943, "learning_rate": 6.532959621333894e-07, "loss": 2.1908475875854494, "step": 119840 }, { "epoch": 0.967445089318147, "grad_norm": 1.404990553855896, "learning_rate": 6.516804923951762e-07, "loss": 2.1451467514038085, "step": 119850 }, { "epoch": 0.967525810643914, "grad_norm": 2.7366833686828613, "learning_rate": 6.500650226569631e-07, "loss": 2.207818603515625, "step": 119860 }, { "epoch": 0.967606531969681, "grad_norm": 4.51303243637085, "learning_rate": 6.484495529187499e-07, "loss": 2.4627513885498047, "step": 119870 }, { "epoch": 0.9676872532954481, "grad_norm": 4.6142802238464355, "learning_rate": 6.468340831805369e-07, "loss": 2.199230194091797, "step": 119880 }, { "epoch": 0.9677679746212152, "grad_norm": 5.141334533691406, "learning_rate": 6.452186134423237e-07, "loss": 2.847715950012207, "step": 119890 }, { "epoch": 0.9678486959469822, "grad_norm": 2.616626262664795, "learning_rate": 6.436031437041106e-07, "loss": 1.8677648544311523, "step": 119900 }, { "epoch": 0.9679294172727493, "grad_norm": 7.39004373550415, "learning_rate": 6.419876739658974e-07, "loss": 2.275567626953125, "step": 119910 }, { "epoch": 0.9680101385985164, "grad_norm": 3.0614099502563477, "learning_rate": 6.403722042276844e-07, "loss": 2.0375091552734377, "step": 119920 }, { "epoch": 0.9680908599242833, "grad_norm": 4.953085899353027, "learning_rate": 6.387567344894713e-07, "loss": 2.146928596496582, "step": 119930 }, { "epoch": 0.9681715812500504, "grad_norm": 5.079051971435547, "learning_rate": 6.371412647512582e-07, "loss": 2.5159969329833984, "step": 119940 }, { "epoch": 0.9682523025758175, "grad_norm": 6.108376979827881, "learning_rate": 6.35525795013045e-07, "loss": 2.741256332397461, "step": 119950 }, { "epoch": 0.9683330239015846, "grad_norm": 4.537164211273193, "learning_rate": 6.339103252748319e-07, "loss": 2.3918596267700196, "step": 119960 }, { "epoch": 0.9684137452273516, "grad_norm": 5.39730167388916, "learning_rate": 6.322948555366187e-07, "loss": 2.2325639724731445, "step": 119970 }, { "epoch": 0.9684944665531187, "grad_norm": 3.6272945404052734, "learning_rate": 6.306793857984056e-07, "loss": 2.693252944946289, "step": 119980 }, { "epoch": 0.9685751878788857, "grad_norm": 2.887342929840088, "learning_rate": 6.290639160601925e-07, "loss": 2.6618635177612306, "step": 119990 }, { "epoch": 0.9686559092046528, "grad_norm": 4.747265815734863, "learning_rate": 6.274484463219793e-07, "loss": 2.406744194030762, "step": 120000 }, { "epoch": 0.9687366305304198, "grad_norm": 2.0102500915527344, "learning_rate": 6.258329765837662e-07, "loss": 2.014687156677246, "step": 120010 }, { "epoch": 0.9688173518561869, "grad_norm": 2.6870217323303223, "learning_rate": 6.24217506845553e-07, "loss": 2.1413673400878905, "step": 120020 }, { "epoch": 0.968898073181954, "grad_norm": 3.712923049926758, "learning_rate": 6.226020371073399e-07, "loss": 2.312137985229492, "step": 120030 }, { "epoch": 0.968978794507721, "grad_norm": 3.269458293914795, "learning_rate": 6.209865673691268e-07, "loss": 2.1534954071044923, "step": 120040 }, { "epoch": 0.969059515833488, "grad_norm": 4.599028587341309, "learning_rate": 6.193710976309137e-07, "loss": 2.7199769973754884, "step": 120050 }, { "epoch": 0.9691402371592551, "grad_norm": 4.120713710784912, "learning_rate": 6.177556278927005e-07, "loss": 2.745742416381836, "step": 120060 }, { "epoch": 0.9692209584850222, "grad_norm": 4.041823387145996, "learning_rate": 6.161401581544874e-07, "loss": 2.4229326248168945, "step": 120070 }, { "epoch": 0.9693016798107892, "grad_norm": 3.317241907119751, "learning_rate": 6.145246884162743e-07, "loss": 2.3019359588623045, "step": 120080 }, { "epoch": 0.9693824011365563, "grad_norm": 2.8215065002441406, "learning_rate": 6.129092186780612e-07, "loss": 2.213024139404297, "step": 120090 }, { "epoch": 0.9694631224623234, "grad_norm": 4.578315734863281, "learning_rate": 6.11293748939848e-07, "loss": 2.435260009765625, "step": 120100 }, { "epoch": 0.9695438437880903, "grad_norm": 3.6161768436431885, "learning_rate": 6.096782792016349e-07, "loss": 2.225981330871582, "step": 120110 }, { "epoch": 0.9696245651138574, "grad_norm": 3.1967456340789795, "learning_rate": 6.080628094634218e-07, "loss": 2.514849853515625, "step": 120120 }, { "epoch": 0.9697052864396245, "grad_norm": 2.5313711166381836, "learning_rate": 6.064473397252087e-07, "loss": 2.332619285583496, "step": 120130 }, { "epoch": 0.9697860077653916, "grad_norm": 1.9634543657302856, "learning_rate": 6.048318699869955e-07, "loss": 2.2742435455322267, "step": 120140 }, { "epoch": 0.9698667290911586, "grad_norm": 3.6455070972442627, "learning_rate": 6.032164002487824e-07, "loss": 2.045871925354004, "step": 120150 }, { "epoch": 0.9699474504169256, "grad_norm": 5.018775463104248, "learning_rate": 6.016009305105693e-07, "loss": 2.720658874511719, "step": 120160 }, { "epoch": 0.9700281717426927, "grad_norm": 2.982027053833008, "learning_rate": 5.999854607723562e-07, "loss": 2.5913137435913085, "step": 120170 }, { "epoch": 0.9701088930684597, "grad_norm": 2.5008437633514404, "learning_rate": 5.98369991034143e-07, "loss": 2.3965002059936524, "step": 120180 }, { "epoch": 0.9701896143942268, "grad_norm": 4.94004487991333, "learning_rate": 5.967545212959298e-07, "loss": 2.6103769302368165, "step": 120190 }, { "epoch": 0.9702703357199939, "grad_norm": 2.7185280323028564, "learning_rate": 5.951390515577168e-07, "loss": 2.5338891983032226, "step": 120200 }, { "epoch": 0.970351057045761, "grad_norm": 1.6537163257598877, "learning_rate": 5.935235818195036e-07, "loss": 2.158978271484375, "step": 120210 }, { "epoch": 0.9704317783715279, "grad_norm": 3.36665415763855, "learning_rate": 5.919081120812905e-07, "loss": 1.7859067916870117, "step": 120220 }, { "epoch": 0.970512499697295, "grad_norm": 11.181600570678711, "learning_rate": 5.902926423430773e-07, "loss": 2.1744178771972655, "step": 120230 }, { "epoch": 0.9705932210230621, "grad_norm": 5.885454177856445, "learning_rate": 5.886771726048642e-07, "loss": 2.367881011962891, "step": 120240 }, { "epoch": 0.9706739423488291, "grad_norm": 5.465035438537598, "learning_rate": 5.870617028666511e-07, "loss": 2.4955501556396484, "step": 120250 }, { "epoch": 0.9707546636745962, "grad_norm": 3.9735028743743896, "learning_rate": 5.85446233128438e-07, "loss": 2.3297279357910154, "step": 120260 }, { "epoch": 0.9708353850003633, "grad_norm": 2.683915376663208, "learning_rate": 5.838307633902249e-07, "loss": 2.1752492904663088, "step": 120270 }, { "epoch": 0.9709161063261303, "grad_norm": 6.785614967346191, "learning_rate": 5.822152936520118e-07, "loss": 3.034745216369629, "step": 120280 }, { "epoch": 0.9709968276518973, "grad_norm": 5.6234235763549805, "learning_rate": 5.805998239137986e-07, "loss": 2.4374130249023436, "step": 120290 }, { "epoch": 0.9710775489776644, "grad_norm": 4.344409942626953, "learning_rate": 5.789843541755855e-07, "loss": 2.1344606399536135, "step": 120300 }, { "epoch": 0.9711582703034315, "grad_norm": 4.536934852600098, "learning_rate": 5.773688844373723e-07, "loss": 2.255900192260742, "step": 120310 }, { "epoch": 0.9712389916291985, "grad_norm": 3.988417625427246, "learning_rate": 5.757534146991592e-07, "loss": 2.323098564147949, "step": 120320 }, { "epoch": 0.9713197129549656, "grad_norm": 3.7691335678100586, "learning_rate": 5.74137944960946e-07, "loss": 2.022271156311035, "step": 120330 }, { "epoch": 0.9714004342807326, "grad_norm": 4.048851490020752, "learning_rate": 5.72522475222733e-07, "loss": 2.4111307144165037, "step": 120340 }, { "epoch": 0.9714811556064997, "grad_norm": 3.111842632293701, "learning_rate": 5.709070054845198e-07, "loss": 2.142625427246094, "step": 120350 }, { "epoch": 0.9715618769322667, "grad_norm": 5.33961820602417, "learning_rate": 5.692915357463067e-07, "loss": 2.672563171386719, "step": 120360 }, { "epoch": 0.9716425982580338, "grad_norm": 4.216978549957275, "learning_rate": 5.676760660080936e-07, "loss": 2.7753503799438475, "step": 120370 }, { "epoch": 0.9717233195838009, "grad_norm": 3.925739288330078, "learning_rate": 5.660605962698804e-07, "loss": 2.1966552734375, "step": 120380 }, { "epoch": 0.9718040409095678, "grad_norm": 2.9916181564331055, "learning_rate": 5.644451265316673e-07, "loss": 2.3540777206420898, "step": 120390 }, { "epoch": 0.9718847622353349, "grad_norm": 3.9729669094085693, "learning_rate": 5.628296567934541e-07, "loss": 2.751914405822754, "step": 120400 }, { "epoch": 0.971965483561102, "grad_norm": 3.9091765880584717, "learning_rate": 5.61214187055241e-07, "loss": 2.385106658935547, "step": 120410 }, { "epoch": 0.9720462048868691, "grad_norm": 2.2437267303466797, "learning_rate": 5.595987173170279e-07, "loss": 2.6169496536254884, "step": 120420 }, { "epoch": 0.9721269262126361, "grad_norm": 3.099424362182617, "learning_rate": 5.579832475788148e-07, "loss": 2.745879364013672, "step": 120430 }, { "epoch": 0.9722076475384032, "grad_norm": 3.0860071182250977, "learning_rate": 5.563677778406016e-07, "loss": 2.5343864440917967, "step": 120440 }, { "epoch": 0.9722883688641702, "grad_norm": 4.82537841796875, "learning_rate": 5.547523081023886e-07, "loss": 2.352389335632324, "step": 120450 }, { "epoch": 0.9723690901899372, "grad_norm": 3.3741567134857178, "learning_rate": 5.531368383641754e-07, "loss": 2.3482776641845704, "step": 120460 }, { "epoch": 0.9724498115157043, "grad_norm": 3.425915241241455, "learning_rate": 5.515213686259623e-07, "loss": 2.064739799499512, "step": 120470 }, { "epoch": 0.9725305328414714, "grad_norm": 4.4257917404174805, "learning_rate": 5.499058988877491e-07, "loss": 2.418692970275879, "step": 120480 }, { "epoch": 0.9726112541672385, "grad_norm": 3.6099483966827393, "learning_rate": 5.48290429149536e-07, "loss": 2.2350812911987306, "step": 120490 }, { "epoch": 0.9726919754930055, "grad_norm": 3.220604181289673, "learning_rate": 5.466749594113228e-07, "loss": 2.287248229980469, "step": 120500 }, { "epoch": 0.9727726968187725, "grad_norm": 3.5486884117126465, "learning_rate": 5.450594896731098e-07, "loss": 2.344277000427246, "step": 120510 }, { "epoch": 0.9728534181445396, "grad_norm": 4.754910469055176, "learning_rate": 5.434440199348966e-07, "loss": 2.214460754394531, "step": 120520 }, { "epoch": 0.9729341394703067, "grad_norm": 3.5386927127838135, "learning_rate": 5.418285501966835e-07, "loss": 2.7603839874267577, "step": 120530 }, { "epoch": 0.9730148607960737, "grad_norm": 3.058300733566284, "learning_rate": 5.402130804584704e-07, "loss": 2.4013906478881837, "step": 120540 }, { "epoch": 0.9730955821218408, "grad_norm": 4.097300052642822, "learning_rate": 5.385976107202572e-07, "loss": 2.756930351257324, "step": 120550 }, { "epoch": 0.9731763034476079, "grad_norm": 3.6218252182006836, "learning_rate": 5.369821409820441e-07, "loss": 2.3618316650390625, "step": 120560 }, { "epoch": 0.9732570247733748, "grad_norm": 5.42995548248291, "learning_rate": 5.353666712438309e-07, "loss": 1.9595087051391602, "step": 120570 }, { "epoch": 0.9733377460991419, "grad_norm": 3.9314420223236084, "learning_rate": 5.337512015056178e-07, "loss": 1.8376903533935547, "step": 120580 }, { "epoch": 0.973418467424909, "grad_norm": 3.126405715942383, "learning_rate": 5.321357317674047e-07, "loss": 2.011294364929199, "step": 120590 }, { "epoch": 0.9734991887506761, "grad_norm": 2.185309410095215, "learning_rate": 5.305202620291916e-07, "loss": 2.554090881347656, "step": 120600 }, { "epoch": 0.9735799100764431, "grad_norm": 2.908205032348633, "learning_rate": 5.289047922909784e-07, "loss": 2.1322998046875, "step": 120610 }, { "epoch": 0.9736606314022102, "grad_norm": 2.690234422683716, "learning_rate": 5.272893225527654e-07, "loss": 1.57849063873291, "step": 120620 }, { "epoch": 0.9737413527279772, "grad_norm": 2.9124324321746826, "learning_rate": 5.256738528145522e-07, "loss": 2.582087516784668, "step": 120630 }, { "epoch": 0.9738220740537442, "grad_norm": 4.585997581481934, "learning_rate": 5.240583830763391e-07, "loss": 1.7011579513549804, "step": 120640 }, { "epoch": 0.9739027953795113, "grad_norm": 2.6334009170532227, "learning_rate": 5.224429133381259e-07, "loss": 2.168234634399414, "step": 120650 }, { "epoch": 0.9739835167052784, "grad_norm": 3.807698965072632, "learning_rate": 5.208274435999128e-07, "loss": 2.760447120666504, "step": 120660 }, { "epoch": 0.9740642380310455, "grad_norm": 3.919046401977539, "learning_rate": 5.192119738616997e-07, "loss": 2.2125646591186525, "step": 120670 }, { "epoch": 0.9741449593568124, "grad_norm": 5.570766448974609, "learning_rate": 5.175965041234866e-07, "loss": 2.1523887634277346, "step": 120680 }, { "epoch": 0.9742256806825795, "grad_norm": 3.144906759262085, "learning_rate": 5.159810343852734e-07, "loss": 2.1778656005859376, "step": 120690 }, { "epoch": 0.9743064020083466, "grad_norm": 2.4306068420410156, "learning_rate": 5.143655646470602e-07, "loss": 2.8034650802612306, "step": 120700 }, { "epoch": 0.9743871233341136, "grad_norm": 2.2737231254577637, "learning_rate": 5.127500949088472e-07, "loss": 2.4833194732666017, "step": 120710 }, { "epoch": 0.9744678446598807, "grad_norm": 3.057889223098755, "learning_rate": 5.11134625170634e-07, "loss": 1.874854850769043, "step": 120720 }, { "epoch": 0.9745485659856478, "grad_norm": 2.446898937225342, "learning_rate": 5.095191554324209e-07, "loss": 2.7023754119873047, "step": 120730 }, { "epoch": 0.9746292873114148, "grad_norm": 3.8410840034484863, "learning_rate": 5.079036856942077e-07, "loss": 2.4233625411987303, "step": 120740 }, { "epoch": 0.9747100086371818, "grad_norm": 5.084534168243408, "learning_rate": 5.062882159559947e-07, "loss": 2.4189731597900392, "step": 120750 }, { "epoch": 0.9747907299629489, "grad_norm": 2.1554744243621826, "learning_rate": 5.046727462177815e-07, "loss": 2.1071989059448244, "step": 120760 }, { "epoch": 0.974871451288716, "grad_norm": 3.028233528137207, "learning_rate": 5.030572764795684e-07, "loss": 2.3000114440917967, "step": 120770 }, { "epoch": 0.974952172614483, "grad_norm": 4.8806352615356445, "learning_rate": 5.014418067413552e-07, "loss": 2.4717483520507812, "step": 120780 }, { "epoch": 0.9750328939402501, "grad_norm": 4.797660827636719, "learning_rate": 4.998263370031422e-07, "loss": 2.378322792053223, "step": 120790 }, { "epoch": 0.9751136152660171, "grad_norm": 3.931152582168579, "learning_rate": 4.98210867264929e-07, "loss": 2.158732795715332, "step": 120800 }, { "epoch": 0.9751943365917842, "grad_norm": 2.844113826751709, "learning_rate": 4.965953975267159e-07, "loss": 2.48612060546875, "step": 120810 }, { "epoch": 0.9752750579175512, "grad_norm": 3.806225061416626, "learning_rate": 4.949799277885028e-07, "loss": 2.5457969665527345, "step": 120820 }, { "epoch": 0.9753557792433183, "grad_norm": 3.1627252101898193, "learning_rate": 4.933644580502897e-07, "loss": 1.9520931243896484, "step": 120830 }, { "epoch": 0.9754365005690854, "grad_norm": 3.4881279468536377, "learning_rate": 4.917489883120765e-07, "loss": 2.174613189697266, "step": 120840 }, { "epoch": 0.9755172218948523, "grad_norm": 2.8702096939086914, "learning_rate": 4.901335185738634e-07, "loss": 3.1487777709960936, "step": 120850 }, { "epoch": 0.9755979432206194, "grad_norm": 4.502722263336182, "learning_rate": 4.885180488356502e-07, "loss": 2.35491886138916, "step": 120860 }, { "epoch": 0.9756786645463865, "grad_norm": 3.2950692176818848, "learning_rate": 4.86902579097437e-07, "loss": 2.3763208389282227, "step": 120870 }, { "epoch": 0.9757593858721536, "grad_norm": 4.817157745361328, "learning_rate": 4.85287109359224e-07, "loss": 2.531002235412598, "step": 120880 }, { "epoch": 0.9758401071979206, "grad_norm": 5.179990768432617, "learning_rate": 4.836716396210108e-07, "loss": 2.270485687255859, "step": 120890 }, { "epoch": 0.9759208285236877, "grad_norm": 4.7590179443359375, "learning_rate": 4.820561698827978e-07, "loss": 2.4476417541503905, "step": 120900 }, { "epoch": 0.9760015498494548, "grad_norm": 2.2954823970794678, "learning_rate": 4.804407001445846e-07, "loss": 2.2916893005371093, "step": 120910 }, { "epoch": 0.9760822711752217, "grad_norm": 2.6400082111358643, "learning_rate": 4.788252304063715e-07, "loss": 1.8307294845581055, "step": 120920 }, { "epoch": 0.9761629925009888, "grad_norm": 5.108478546142578, "learning_rate": 4.772097606681583e-07, "loss": 2.6732484817504885, "step": 120930 }, { "epoch": 0.9762437138267559, "grad_norm": 4.552839279174805, "learning_rate": 4.755942909299452e-07, "loss": 2.28102970123291, "step": 120940 }, { "epoch": 0.976324435152523, "grad_norm": 3.817018747329712, "learning_rate": 4.7397882119173207e-07, "loss": 2.686573791503906, "step": 120950 }, { "epoch": 0.97640515647829, "grad_norm": 5.717988014221191, "learning_rate": 4.723633514535189e-07, "loss": 2.6426101684570313, "step": 120960 }, { "epoch": 0.976485877804057, "grad_norm": 3.113872528076172, "learning_rate": 4.7074788171530576e-07, "loss": 2.4488012313842775, "step": 120970 }, { "epoch": 0.9765665991298241, "grad_norm": 4.509495258331299, "learning_rate": 4.691324119770927e-07, "loss": 2.5449960708618162, "step": 120980 }, { "epoch": 0.9766473204555912, "grad_norm": 1.2548997402191162, "learning_rate": 4.6751694223887956e-07, "loss": 1.3332060813903808, "step": 120990 }, { "epoch": 0.9767280417813582, "grad_norm": 3.5720226764678955, "learning_rate": 4.659014725006664e-07, "loss": 2.6152034759521485, "step": 121000 }, { "epoch": 0.9768087631071253, "grad_norm": 4.150032043457031, "learning_rate": 4.642860027624533e-07, "loss": 2.288047981262207, "step": 121010 }, { "epoch": 0.9768894844328924, "grad_norm": 3.0062007904052734, "learning_rate": 4.6267053302424015e-07, "loss": 1.9835887908935548, "step": 121020 }, { "epoch": 0.9769702057586593, "grad_norm": 4.559480667114258, "learning_rate": 4.61055063286027e-07, "loss": 2.3840335845947265, "step": 121030 }, { "epoch": 0.9770509270844264, "grad_norm": 3.093107223510742, "learning_rate": 4.594395935478139e-07, "loss": 2.322421836853027, "step": 121040 }, { "epoch": 0.9771316484101935, "grad_norm": 3.222400188446045, "learning_rate": 4.5782412380960074e-07, "loss": 2.6934364318847654, "step": 121050 }, { "epoch": 0.9772123697359606, "grad_norm": 5.002413272857666, "learning_rate": 4.562086540713877e-07, "loss": 2.6353853225708006, "step": 121060 }, { "epoch": 0.9772930910617276, "grad_norm": 4.761448860168457, "learning_rate": 4.5459318433317454e-07, "loss": 2.05975284576416, "step": 121070 }, { "epoch": 0.9773738123874947, "grad_norm": 2.3533132076263428, "learning_rate": 4.529777145949614e-07, "loss": 2.190019989013672, "step": 121080 }, { "epoch": 0.9774545337132617, "grad_norm": 4.011369228363037, "learning_rate": 4.513622448567483e-07, "loss": 2.554614448547363, "step": 121090 }, { "epoch": 0.9775352550390287, "grad_norm": 4.7432098388671875, "learning_rate": 4.4974677511853513e-07, "loss": 2.2143152236938475, "step": 121100 }, { "epoch": 0.9776159763647958, "grad_norm": 3.990750789642334, "learning_rate": 4.48131305380322e-07, "loss": 2.1263729095458985, "step": 121110 }, { "epoch": 0.9776966976905629, "grad_norm": 5.909686088562012, "learning_rate": 4.4651583564210887e-07, "loss": 2.1621156692504884, "step": 121120 }, { "epoch": 0.97777741901633, "grad_norm": 2.661496639251709, "learning_rate": 4.449003659038957e-07, "loss": 2.4738433837890623, "step": 121130 }, { "epoch": 0.977858140342097, "grad_norm": 3.3406853675842285, "learning_rate": 4.432848961656826e-07, "loss": 2.2808404922485352, "step": 121140 }, { "epoch": 0.977938861667864, "grad_norm": 6.621272563934326, "learning_rate": 4.416694264274695e-07, "loss": 2.3152353286743166, "step": 121150 }, { "epoch": 0.9780195829936311, "grad_norm": 3.8819236755371094, "learning_rate": 4.4005395668925636e-07, "loss": 2.0222312927246096, "step": 121160 }, { "epoch": 0.9781003043193981, "grad_norm": 2.9984660148620605, "learning_rate": 4.384384869510432e-07, "loss": 2.062092971801758, "step": 121170 }, { "epoch": 0.9781810256451652, "grad_norm": 5.615437984466553, "learning_rate": 4.368230172128301e-07, "loss": 2.203137016296387, "step": 121180 }, { "epoch": 0.9782617469709323, "grad_norm": 2.8236594200134277, "learning_rate": 4.3520754747461695e-07, "loss": 1.7897510528564453, "step": 121190 }, { "epoch": 0.9783424682966994, "grad_norm": 4.380089282989502, "learning_rate": 4.335920777364038e-07, "loss": 2.565998649597168, "step": 121200 }, { "epoch": 0.9784231896224663, "grad_norm": 3.7847533226013184, "learning_rate": 4.319766079981907e-07, "loss": 2.150663375854492, "step": 121210 }, { "epoch": 0.9785039109482334, "grad_norm": 3.0893054008483887, "learning_rate": 4.303611382599776e-07, "loss": 2.367721366882324, "step": 121220 }, { "epoch": 0.9785846322740005, "grad_norm": 4.563976287841797, "learning_rate": 4.287456685217645e-07, "loss": 2.490047645568848, "step": 121230 }, { "epoch": 0.9786653535997675, "grad_norm": 4.121178150177002, "learning_rate": 4.2713019878355134e-07, "loss": 2.1864173889160154, "step": 121240 }, { "epoch": 0.9787460749255346, "grad_norm": 2.001781940460205, "learning_rate": 4.255147290453382e-07, "loss": 1.9780136108398438, "step": 121250 }, { "epoch": 0.9788267962513016, "grad_norm": 2.6401548385620117, "learning_rate": 4.2389925930712503e-07, "loss": 2.4070856094360353, "step": 121260 }, { "epoch": 0.9789075175770687, "grad_norm": 2.2106709480285645, "learning_rate": 4.2228378956891193e-07, "loss": 2.647271728515625, "step": 121270 }, { "epoch": 0.9789882389028357, "grad_norm": 3.294426202774048, "learning_rate": 4.206683198306988e-07, "loss": 2.3376264572143555, "step": 121280 }, { "epoch": 0.9790689602286028, "grad_norm": 3.9883410930633545, "learning_rate": 4.190528500924857e-07, "loss": 2.3227209091186523, "step": 121290 }, { "epoch": 0.9791496815543699, "grad_norm": 2.7185895442962646, "learning_rate": 4.1743738035427257e-07, "loss": 2.0347402572631834, "step": 121300 }, { "epoch": 0.9792304028801369, "grad_norm": 3.8197710514068604, "learning_rate": 4.158219106160594e-07, "loss": 2.0602954864501952, "step": 121310 }, { "epoch": 0.9793111242059039, "grad_norm": 3.629408836364746, "learning_rate": 4.142064408778463e-07, "loss": 2.320232391357422, "step": 121320 }, { "epoch": 0.979391845531671, "grad_norm": 4.080706596374512, "learning_rate": 4.1259097113963316e-07, "loss": 2.512705421447754, "step": 121330 }, { "epoch": 0.9794725668574381, "grad_norm": 3.6808183193206787, "learning_rate": 4.1097550140142e-07, "loss": 2.1449012756347656, "step": 121340 }, { "epoch": 0.9795532881832051, "grad_norm": 4.851232051849365, "learning_rate": 4.093600316632069e-07, "loss": 2.4208812713623047, "step": 121350 }, { "epoch": 0.9796340095089722, "grad_norm": 3.5181455612182617, "learning_rate": 4.0774456192499375e-07, "loss": 2.679098129272461, "step": 121360 }, { "epoch": 0.9797147308347393, "grad_norm": 3.4649085998535156, "learning_rate": 4.0612909218678065e-07, "loss": 2.219708061218262, "step": 121370 }, { "epoch": 0.9797954521605062, "grad_norm": 4.300656795501709, "learning_rate": 4.0451362244856755e-07, "loss": 2.5055633544921876, "step": 121380 }, { "epoch": 0.9798761734862733, "grad_norm": 3.721942186355591, "learning_rate": 4.028981527103544e-07, "loss": 2.347523498535156, "step": 121390 }, { "epoch": 0.9799568948120404, "grad_norm": 2.017512083053589, "learning_rate": 4.0128268297214124e-07, "loss": 2.380126190185547, "step": 121400 }, { "epoch": 0.9800376161378075, "grad_norm": 4.361588954925537, "learning_rate": 3.9966721323392814e-07, "loss": 2.7419593811035154, "step": 121410 }, { "epoch": 0.9801183374635745, "grad_norm": 2.648667335510254, "learning_rate": 3.98051743495715e-07, "loss": 2.1332578659057617, "step": 121420 }, { "epoch": 0.9801990587893415, "grad_norm": 3.745250701904297, "learning_rate": 3.9643627375750183e-07, "loss": 2.3456470489501955, "step": 121430 }, { "epoch": 0.9802797801151086, "grad_norm": 3.989223003387451, "learning_rate": 3.9482080401928873e-07, "loss": 1.9482088088989258, "step": 121440 }, { "epoch": 0.9803605014408757, "grad_norm": 3.1877496242523193, "learning_rate": 3.9320533428107563e-07, "loss": 2.2490230560302735, "step": 121450 }, { "epoch": 0.9804412227666427, "grad_norm": 3.303072929382324, "learning_rate": 3.9158986454286253e-07, "loss": 1.908052635192871, "step": 121460 }, { "epoch": 0.9805219440924098, "grad_norm": 2.8365914821624756, "learning_rate": 3.8997439480464937e-07, "loss": 2.3703401565551756, "step": 121470 }, { "epoch": 0.9806026654181769, "grad_norm": 5.280229091644287, "learning_rate": 3.883589250664362e-07, "loss": 2.4317564010620116, "step": 121480 }, { "epoch": 0.9806833867439438, "grad_norm": 3.2647545337677, "learning_rate": 3.867434553282231e-07, "loss": 2.399832344055176, "step": 121490 }, { "epoch": 0.9807641080697109, "grad_norm": 1.8750274181365967, "learning_rate": 3.8512798559000996e-07, "loss": 2.212497520446777, "step": 121500 }, { "epoch": 0.980844829395478, "grad_norm": 4.0841450691223145, "learning_rate": 3.835125158517968e-07, "loss": 2.2799150466918947, "step": 121510 }, { "epoch": 0.9809255507212451, "grad_norm": 5.459156036376953, "learning_rate": 3.8189704611358365e-07, "loss": 2.221553611755371, "step": 121520 }, { "epoch": 0.9810062720470121, "grad_norm": 4.576216697692871, "learning_rate": 3.802815763753706e-07, "loss": 2.442551612854004, "step": 121530 }, { "epoch": 0.9810869933727792, "grad_norm": 3.1854357719421387, "learning_rate": 3.7866610663715745e-07, "loss": 2.468537521362305, "step": 121540 }, { "epoch": 0.9811677146985462, "grad_norm": 2.1836533546447754, "learning_rate": 3.7705063689894435e-07, "loss": 2.1517372131347656, "step": 121550 }, { "epoch": 0.9812484360243132, "grad_norm": 4.588289260864258, "learning_rate": 3.754351671607312e-07, "loss": 2.308302116394043, "step": 121560 }, { "epoch": 0.9813291573500803, "grad_norm": 5.961582183837891, "learning_rate": 3.7381969742251804e-07, "loss": 2.3801223754882814, "step": 121570 }, { "epoch": 0.9814098786758474, "grad_norm": 2.0876903533935547, "learning_rate": 3.7220422768430494e-07, "loss": 1.9784894943237306, "step": 121580 }, { "epoch": 0.9814906000016145, "grad_norm": 4.3310136795043945, "learning_rate": 3.705887579460918e-07, "loss": 2.638733673095703, "step": 121590 }, { "epoch": 0.9815713213273815, "grad_norm": 2.443319797515869, "learning_rate": 3.6897328820787863e-07, "loss": 2.434798240661621, "step": 121600 }, { "epoch": 0.9816520426531485, "grad_norm": 2.625988006591797, "learning_rate": 3.673578184696656e-07, "loss": 2.7447681427001953, "step": 121610 }, { "epoch": 0.9817327639789156, "grad_norm": 3.506972074508667, "learning_rate": 3.6574234873145243e-07, "loss": 2.462162399291992, "step": 121620 }, { "epoch": 0.9818134853046826, "grad_norm": 3.5824978351593018, "learning_rate": 3.641268789932393e-07, "loss": 2.3427751541137694, "step": 121630 }, { "epoch": 0.9818942066304497, "grad_norm": 2.015434980392456, "learning_rate": 3.6251140925502617e-07, "loss": 2.2188411712646485, "step": 121640 }, { "epoch": 0.9819749279562168, "grad_norm": 1.8296748399734497, "learning_rate": 3.60895939516813e-07, "loss": 1.7697002410888671, "step": 121650 }, { "epoch": 0.9820556492819839, "grad_norm": 4.094779014587402, "learning_rate": 3.5928046977859986e-07, "loss": 2.189277458190918, "step": 121660 }, { "epoch": 0.9821363706077508, "grad_norm": 3.53430438041687, "learning_rate": 3.5766500004038676e-07, "loss": 2.1231052398681642, "step": 121670 }, { "epoch": 0.9822170919335179, "grad_norm": 3.3379366397857666, "learning_rate": 3.560495303021736e-07, "loss": 2.4204170227050783, "step": 121680 }, { "epoch": 0.982297813259285, "grad_norm": 5.876993656158447, "learning_rate": 3.5443406056396056e-07, "loss": 2.5095821380615235, "step": 121690 }, { "epoch": 0.982378534585052, "grad_norm": 3.6768569946289062, "learning_rate": 3.528185908257474e-07, "loss": 2.0968013763427735, "step": 121700 }, { "epoch": 0.9824592559108191, "grad_norm": 4.118659019470215, "learning_rate": 3.5120312108753425e-07, "loss": 2.6794418334960937, "step": 121710 }, { "epoch": 0.9825399772365861, "grad_norm": 4.059993267059326, "learning_rate": 3.4958765134932115e-07, "loss": 2.5005874633789062, "step": 121720 }, { "epoch": 0.9826206985623532, "grad_norm": 2.8272769451141357, "learning_rate": 3.47972181611108e-07, "loss": 2.4265157699584963, "step": 121730 }, { "epoch": 0.9827014198881202, "grad_norm": 3.3595471382141113, "learning_rate": 3.4635671187289484e-07, "loss": 2.2878841400146483, "step": 121740 }, { "epoch": 0.9827821412138873, "grad_norm": 3.3184354305267334, "learning_rate": 3.4474124213468174e-07, "loss": 2.4848398208618163, "step": 121750 }, { "epoch": 0.9828628625396544, "grad_norm": 4.940225601196289, "learning_rate": 3.4312577239646864e-07, "loss": 2.08499755859375, "step": 121760 }, { "epoch": 0.9829435838654214, "grad_norm": 4.028210639953613, "learning_rate": 3.415103026582555e-07, "loss": 2.3046064376831055, "step": 121770 }, { "epoch": 0.9830243051911884, "grad_norm": 2.5724525451660156, "learning_rate": 3.398948329200424e-07, "loss": 2.048031806945801, "step": 121780 }, { "epoch": 0.9831050265169555, "grad_norm": 2.3806838989257812, "learning_rate": 3.3827936318182923e-07, "loss": 2.15484561920166, "step": 121790 }, { "epoch": 0.9831857478427226, "grad_norm": 3.847503423690796, "learning_rate": 3.366638934436161e-07, "loss": 2.674347686767578, "step": 121800 }, { "epoch": 0.9832664691684896, "grad_norm": 3.3696489334106445, "learning_rate": 3.3504842370540297e-07, "loss": 1.9126449584960938, "step": 121810 }, { "epoch": 0.9833471904942567, "grad_norm": 3.8870115280151367, "learning_rate": 3.334329539671898e-07, "loss": 1.94371337890625, "step": 121820 }, { "epoch": 0.9834279118200238, "grad_norm": 3.325859785079956, "learning_rate": 3.3181748422897666e-07, "loss": 2.2228511810302733, "step": 121830 }, { "epoch": 0.9835086331457907, "grad_norm": 5.634443283081055, "learning_rate": 3.302020144907636e-07, "loss": 2.7546005249023438, "step": 121840 }, { "epoch": 0.9835893544715578, "grad_norm": 4.786739826202393, "learning_rate": 3.2858654475255046e-07, "loss": 2.178575897216797, "step": 121850 }, { "epoch": 0.9836700757973249, "grad_norm": 2.579284906387329, "learning_rate": 3.2697107501433736e-07, "loss": 2.148061752319336, "step": 121860 }, { "epoch": 0.983750797123092, "grad_norm": 8.146512985229492, "learning_rate": 3.253556052761242e-07, "loss": 2.5017059326171873, "step": 121870 }, { "epoch": 0.983831518448859, "grad_norm": 3.99482798576355, "learning_rate": 3.2374013553791105e-07, "loss": 1.934211540222168, "step": 121880 }, { "epoch": 0.983912239774626, "grad_norm": 2.7222442626953125, "learning_rate": 3.221246657996979e-07, "loss": 2.5090442657470704, "step": 121890 }, { "epoch": 0.9839929611003931, "grad_norm": 2.752408981323242, "learning_rate": 3.205091960614848e-07, "loss": 2.1275129318237305, "step": 121900 }, { "epoch": 0.9840736824261601, "grad_norm": 3.2433207035064697, "learning_rate": 3.1889372632327164e-07, "loss": 2.380642127990723, "step": 121910 }, { "epoch": 0.9841544037519272, "grad_norm": 2.942284107208252, "learning_rate": 3.172782565850586e-07, "loss": 2.2664052963256838, "step": 121920 }, { "epoch": 0.9842351250776943, "grad_norm": 2.705061197280884, "learning_rate": 3.1566278684684544e-07, "loss": 2.097221565246582, "step": 121930 }, { "epoch": 0.9843158464034614, "grad_norm": 4.908243656158447, "learning_rate": 3.140473171086323e-07, "loss": 2.074849510192871, "step": 121940 }, { "epoch": 0.9843965677292283, "grad_norm": 8.21194839477539, "learning_rate": 3.124318473704192e-07, "loss": 2.0865964889526367, "step": 121950 }, { "epoch": 0.9844772890549954, "grad_norm": 3.3916707038879395, "learning_rate": 3.1081637763220603e-07, "loss": 2.4972679138183596, "step": 121960 }, { "epoch": 0.9845580103807625, "grad_norm": 3.112501382827759, "learning_rate": 3.092009078939929e-07, "loss": 2.298626708984375, "step": 121970 }, { "epoch": 0.9846387317065296, "grad_norm": 4.384444236755371, "learning_rate": 3.0758543815577977e-07, "loss": 2.288152503967285, "step": 121980 }, { "epoch": 0.9847194530322966, "grad_norm": 2.989250659942627, "learning_rate": 3.0596996841756667e-07, "loss": 2.381494331359863, "step": 121990 }, { "epoch": 0.9848001743580637, "grad_norm": 5.862224102020264, "learning_rate": 3.043544986793535e-07, "loss": 2.5236268997192384, "step": 122000 }, { "epoch": 0.9848808956838307, "grad_norm": 3.6772220134735107, "learning_rate": 3.0273902894114036e-07, "loss": 2.395573043823242, "step": 122010 }, { "epoch": 0.9849616170095977, "grad_norm": 3.2008159160614014, "learning_rate": 3.0112355920292726e-07, "loss": 2.409359931945801, "step": 122020 }, { "epoch": 0.9850423383353648, "grad_norm": 1.8807604312896729, "learning_rate": 2.995080894647141e-07, "loss": 2.2389499664306642, "step": 122030 }, { "epoch": 0.9851230596611319, "grad_norm": 2.9187541007995605, "learning_rate": 2.97892619726501e-07, "loss": 2.4482757568359377, "step": 122040 }, { "epoch": 0.985203780986899, "grad_norm": 3.598111629486084, "learning_rate": 2.9627714998828785e-07, "loss": 2.3568159103393556, "step": 122050 }, { "epoch": 0.985284502312666, "grad_norm": 7.310243606567383, "learning_rate": 2.9466168025007475e-07, "loss": 2.525189971923828, "step": 122060 }, { "epoch": 0.985365223638433, "grad_norm": 2.46049165725708, "learning_rate": 2.930462105118616e-07, "loss": 2.4260076522827148, "step": 122070 }, { "epoch": 0.9854459449642001, "grad_norm": 2.8368442058563232, "learning_rate": 2.914307407736485e-07, "loss": 2.1880161285400392, "step": 122080 }, { "epoch": 0.9855266662899671, "grad_norm": 2.4390933513641357, "learning_rate": 2.8981527103543534e-07, "loss": 2.3133230209350586, "step": 122090 }, { "epoch": 0.9856073876157342, "grad_norm": 3.59039306640625, "learning_rate": 2.8819980129722224e-07, "loss": 1.9908447265625, "step": 122100 }, { "epoch": 0.9856881089415013, "grad_norm": 2.5977795124053955, "learning_rate": 2.865843315590091e-07, "loss": 2.3410030364990235, "step": 122110 }, { "epoch": 0.9857688302672684, "grad_norm": 3.614798069000244, "learning_rate": 2.84968861820796e-07, "loss": 2.489667510986328, "step": 122120 }, { "epoch": 0.9858495515930353, "grad_norm": 4.552616119384766, "learning_rate": 2.8335339208258283e-07, "loss": 1.6136775970458985, "step": 122130 }, { "epoch": 0.9859302729188024, "grad_norm": 3.8151516914367676, "learning_rate": 2.8173792234436973e-07, "loss": 2.180820846557617, "step": 122140 }, { "epoch": 0.9860109942445695, "grad_norm": 1.9819755554199219, "learning_rate": 2.801224526061566e-07, "loss": 1.6506141662597655, "step": 122150 }, { "epoch": 0.9860917155703365, "grad_norm": 4.978813171386719, "learning_rate": 2.785069828679434e-07, "loss": 2.0624923706054688, "step": 122160 }, { "epoch": 0.9861724368961036, "grad_norm": 2.198214530944824, "learning_rate": 2.768915131297303e-07, "loss": 1.9396188735961915, "step": 122170 }, { "epoch": 0.9862531582218707, "grad_norm": 5.034679889678955, "learning_rate": 2.752760433915172e-07, "loss": 2.172905921936035, "step": 122180 }, { "epoch": 0.9863338795476377, "grad_norm": 3.5800983905792236, "learning_rate": 2.7366057365330406e-07, "loss": 2.2026641845703123, "step": 122190 }, { "epoch": 0.9864146008734047, "grad_norm": 5.3917131423950195, "learning_rate": 2.720451039150909e-07, "loss": 2.3199304580688476, "step": 122200 }, { "epoch": 0.9864953221991718, "grad_norm": 2.8092222213745117, "learning_rate": 2.704296341768778e-07, "loss": 2.467909812927246, "step": 122210 }, { "epoch": 0.9865760435249389, "grad_norm": 2.214311122894287, "learning_rate": 2.688141644386647e-07, "loss": 2.1322708129882812, "step": 122220 }, { "epoch": 0.9866567648507059, "grad_norm": 2.7324931621551514, "learning_rate": 2.6719869470045155e-07, "loss": 2.691379928588867, "step": 122230 }, { "epoch": 0.9867374861764729, "grad_norm": 4.051777362823486, "learning_rate": 2.655832249622384e-07, "loss": 2.4027328491210938, "step": 122240 }, { "epoch": 0.98681820750224, "grad_norm": 7.17533540725708, "learning_rate": 2.639677552240253e-07, "loss": 2.846650505065918, "step": 122250 }, { "epoch": 0.9868989288280071, "grad_norm": 2.017076253890991, "learning_rate": 2.6235228548581214e-07, "loss": 2.564234161376953, "step": 122260 }, { "epoch": 0.9869796501537741, "grad_norm": 3.251319408416748, "learning_rate": 2.6073681574759904e-07, "loss": 2.590239715576172, "step": 122270 }, { "epoch": 0.9870603714795412, "grad_norm": 3.9757800102233887, "learning_rate": 2.591213460093859e-07, "loss": 2.365122604370117, "step": 122280 }, { "epoch": 0.9871410928053083, "grad_norm": 3.8685333728790283, "learning_rate": 2.575058762711728e-07, "loss": 1.7758295059204101, "step": 122290 }, { "epoch": 0.9872218141310752, "grad_norm": 3.4145240783691406, "learning_rate": 2.5589040653295963e-07, "loss": 2.680069923400879, "step": 122300 }, { "epoch": 0.9873025354568423, "grad_norm": 2.835090398788452, "learning_rate": 2.5427493679474653e-07, "loss": 2.0443889617919924, "step": 122310 }, { "epoch": 0.9873832567826094, "grad_norm": 3.275463342666626, "learning_rate": 2.526594670565334e-07, "loss": 1.8447790145874023, "step": 122320 }, { "epoch": 0.9874639781083765, "grad_norm": 4.458674430847168, "learning_rate": 2.5104399731832027e-07, "loss": 2.140715217590332, "step": 122330 }, { "epoch": 0.9875446994341435, "grad_norm": 3.2239301204681396, "learning_rate": 2.494285275801071e-07, "loss": 2.5535276412963865, "step": 122340 }, { "epoch": 0.9876254207599106, "grad_norm": 3.1358816623687744, "learning_rate": 2.47813057841894e-07, "loss": 2.1457534790039063, "step": 122350 }, { "epoch": 0.9877061420856776, "grad_norm": 2.793811082839966, "learning_rate": 2.4619758810368086e-07, "loss": 2.5275407791137696, "step": 122360 }, { "epoch": 0.9877868634114446, "grad_norm": 2.2705588340759277, "learning_rate": 2.4458211836546776e-07, "loss": 2.3349870681762694, "step": 122370 }, { "epoch": 0.9878675847372117, "grad_norm": 2.3866183757781982, "learning_rate": 2.429666486272546e-07, "loss": 2.01202449798584, "step": 122380 }, { "epoch": 0.9879483060629788, "grad_norm": 2.9839847087860107, "learning_rate": 2.4135117888904145e-07, "loss": 2.4134653091430662, "step": 122390 }, { "epoch": 0.9880290273887459, "grad_norm": 3.5697309970855713, "learning_rate": 2.3973570915082835e-07, "loss": 2.394447135925293, "step": 122400 }, { "epoch": 0.9881097487145128, "grad_norm": 3.4058034420013428, "learning_rate": 2.3812023941261525e-07, "loss": 1.9327953338623047, "step": 122410 }, { "epoch": 0.9881904700402799, "grad_norm": 3.4283177852630615, "learning_rate": 2.365047696744021e-07, "loss": 1.684153938293457, "step": 122420 }, { "epoch": 0.988271191366047, "grad_norm": 3.9185869693756104, "learning_rate": 2.3488929993618897e-07, "loss": 2.257475471496582, "step": 122430 }, { "epoch": 0.9883519126918141, "grad_norm": 2.758385419845581, "learning_rate": 2.3327383019797581e-07, "loss": 2.1623626708984376, "step": 122440 }, { "epoch": 0.9884326340175811, "grad_norm": 7.0714240074157715, "learning_rate": 2.316583604597627e-07, "loss": 3.29949951171875, "step": 122450 }, { "epoch": 0.9885133553433482, "grad_norm": 2.4385628700256348, "learning_rate": 2.3004289072154958e-07, "loss": 2.0563961029052735, "step": 122460 }, { "epoch": 0.9885940766691153, "grad_norm": 4.876952648162842, "learning_rate": 2.2842742098333646e-07, "loss": 2.466389846801758, "step": 122470 }, { "epoch": 0.9886747979948822, "grad_norm": 2.6079108715057373, "learning_rate": 2.268119512451233e-07, "loss": 2.077536392211914, "step": 122480 }, { "epoch": 0.9887555193206493, "grad_norm": 3.620812177658081, "learning_rate": 2.251964815069102e-07, "loss": 2.3216011047363283, "step": 122490 }, { "epoch": 0.9888362406464164, "grad_norm": 3.1797561645507812, "learning_rate": 2.2358101176869707e-07, "loss": 2.6690671920776365, "step": 122500 }, { "epoch": 0.9889169619721835, "grad_norm": 2.6725544929504395, "learning_rate": 2.2196554203048392e-07, "loss": 2.451540946960449, "step": 122510 }, { "epoch": 0.9889976832979505, "grad_norm": 3.7915425300598145, "learning_rate": 2.203500722922708e-07, "loss": 2.6464027404785155, "step": 122520 }, { "epoch": 0.9890784046237175, "grad_norm": 3.3251333236694336, "learning_rate": 2.187346025540577e-07, "loss": 2.1993227005004883, "step": 122530 }, { "epoch": 0.9891591259494846, "grad_norm": 3.3069381713867188, "learning_rate": 2.1711913281584453e-07, "loss": 1.8262504577636718, "step": 122540 }, { "epoch": 0.9892398472752516, "grad_norm": 2.0268263816833496, "learning_rate": 2.155036630776314e-07, "loss": 2.5506372451782227, "step": 122550 }, { "epoch": 0.9893205686010187, "grad_norm": 3.214111804962158, "learning_rate": 2.1388819333941828e-07, "loss": 2.915431022644043, "step": 122560 }, { "epoch": 0.9894012899267858, "grad_norm": 3.354994297027588, "learning_rate": 2.1227272360120518e-07, "loss": 3.008401107788086, "step": 122570 }, { "epoch": 0.9894820112525529, "grad_norm": 2.5327308177948, "learning_rate": 2.1065725386299202e-07, "loss": 2.0602155685424806, "step": 122580 }, { "epoch": 0.9895627325783198, "grad_norm": 4.035093784332275, "learning_rate": 2.090417841247789e-07, "loss": 2.3113908767700195, "step": 122590 }, { "epoch": 0.9896434539040869, "grad_norm": 2.9586658477783203, "learning_rate": 2.0742631438656577e-07, "loss": 2.4276424407958985, "step": 122600 }, { "epoch": 0.989724175229854, "grad_norm": 1.954433798789978, "learning_rate": 2.0581084464835264e-07, "loss": 2.1448877334594725, "step": 122610 }, { "epoch": 0.989804896555621, "grad_norm": 2.208998441696167, "learning_rate": 2.041953749101395e-07, "loss": 2.306549072265625, "step": 122620 }, { "epoch": 0.9898856178813881, "grad_norm": 3.6981277465820312, "learning_rate": 2.0257990517192638e-07, "loss": 2.383828353881836, "step": 122630 }, { "epoch": 0.9899663392071552, "grad_norm": 3.0348427295684814, "learning_rate": 2.0096443543371323e-07, "loss": 2.927728271484375, "step": 122640 }, { "epoch": 0.9900470605329222, "grad_norm": 4.9063215255737305, "learning_rate": 1.9934896569550013e-07, "loss": 2.1723300933837892, "step": 122650 }, { "epoch": 0.9901277818586892, "grad_norm": 5.147653579711914, "learning_rate": 1.97733495957287e-07, "loss": 2.3914348602294924, "step": 122660 }, { "epoch": 0.9902085031844563, "grad_norm": 2.251275062561035, "learning_rate": 1.9611802621907385e-07, "loss": 2.3245723724365233, "step": 122670 }, { "epoch": 0.9902892245102234, "grad_norm": 3.31821346282959, "learning_rate": 1.9450255648086072e-07, "loss": 2.1042207717895507, "step": 122680 }, { "epoch": 0.9903699458359904, "grad_norm": 2.3293731212615967, "learning_rate": 1.9288708674264762e-07, "loss": 2.106940269470215, "step": 122690 }, { "epoch": 0.9904506671617574, "grad_norm": 3.795203924179077, "learning_rate": 1.912716170044345e-07, "loss": 2.0917360305786135, "step": 122700 }, { "epoch": 0.9905313884875245, "grad_norm": 3.7959303855895996, "learning_rate": 1.8965614726622134e-07, "loss": 1.8900360107421874, "step": 122710 }, { "epoch": 0.9906121098132916, "grad_norm": 3.438849925994873, "learning_rate": 1.8804067752800823e-07, "loss": 2.674663543701172, "step": 122720 }, { "epoch": 0.9906928311390586, "grad_norm": 1.991312861442566, "learning_rate": 1.864252077897951e-07, "loss": 2.1148025512695314, "step": 122730 }, { "epoch": 0.9907735524648257, "grad_norm": 3.4705324172973633, "learning_rate": 1.8480973805158195e-07, "loss": 2.305454063415527, "step": 122740 }, { "epoch": 0.9908542737905928, "grad_norm": 5.036088943481445, "learning_rate": 1.8319426831336882e-07, "loss": 2.367814636230469, "step": 122750 }, { "epoch": 0.9909349951163597, "grad_norm": 3.8859243392944336, "learning_rate": 1.8157879857515572e-07, "loss": 2.154405975341797, "step": 122760 }, { "epoch": 0.9910157164421268, "grad_norm": 2.3828296661376953, "learning_rate": 1.799633288369426e-07, "loss": 2.7351959228515623, "step": 122770 }, { "epoch": 0.9910964377678939, "grad_norm": 4.135331153869629, "learning_rate": 1.7834785909872944e-07, "loss": 2.2029956817626952, "step": 122780 }, { "epoch": 0.991177159093661, "grad_norm": 3.9308652877807617, "learning_rate": 1.767323893605163e-07, "loss": 2.0947986602783204, "step": 122790 }, { "epoch": 0.991257880419428, "grad_norm": 3.2482120990753174, "learning_rate": 1.751169196223032e-07, "loss": 2.6129627227783203, "step": 122800 }, { "epoch": 0.9913386017451951, "grad_norm": 3.0073561668395996, "learning_rate": 1.7350144988409006e-07, "loss": 2.312267303466797, "step": 122810 }, { "epoch": 0.9914193230709621, "grad_norm": 7.471670627593994, "learning_rate": 1.7188598014587693e-07, "loss": 2.9290481567382813, "step": 122820 }, { "epoch": 0.9915000443967291, "grad_norm": 3.6292002201080322, "learning_rate": 1.702705104076638e-07, "loss": 2.3660287857055664, "step": 122830 }, { "epoch": 0.9915807657224962, "grad_norm": 1.9351534843444824, "learning_rate": 1.686550406694507e-07, "loss": 1.8747791290283202, "step": 122840 }, { "epoch": 0.9916614870482633, "grad_norm": 2.806199550628662, "learning_rate": 1.6703957093123755e-07, "loss": 2.348044013977051, "step": 122850 }, { "epoch": 0.9917422083740304, "grad_norm": 3.589801549911499, "learning_rate": 1.6542410119302442e-07, "loss": 2.1170269012451173, "step": 122860 }, { "epoch": 0.9918229296997974, "grad_norm": 4.787539958953857, "learning_rate": 1.6380863145481126e-07, "loss": 2.2439859390258787, "step": 122870 }, { "epoch": 0.9919036510255644, "grad_norm": 4.004039287567139, "learning_rate": 1.6219316171659816e-07, "loss": 2.069259262084961, "step": 122880 }, { "epoch": 0.9919843723513315, "grad_norm": 3.8745601177215576, "learning_rate": 1.6057769197838503e-07, "loss": 2.5699323654174804, "step": 122890 }, { "epoch": 0.9920650936770985, "grad_norm": 3.1776986122131348, "learning_rate": 1.589622222401719e-07, "loss": 2.400651741027832, "step": 122900 }, { "epoch": 0.9921458150028656, "grad_norm": 4.268596172332764, "learning_rate": 1.5734675250195875e-07, "loss": 2.1019638061523436, "step": 122910 }, { "epoch": 0.9922265363286327, "grad_norm": 4.127091884613037, "learning_rate": 1.5573128276374562e-07, "loss": 2.141460418701172, "step": 122920 }, { "epoch": 0.9923072576543998, "grad_norm": 3.907372236251831, "learning_rate": 1.5411581302553252e-07, "loss": 1.783155632019043, "step": 122930 }, { "epoch": 0.9923879789801667, "grad_norm": 3.5312535762786865, "learning_rate": 1.5250034328731937e-07, "loss": 2.351718711853027, "step": 122940 }, { "epoch": 0.9924687003059338, "grad_norm": 3.1614418029785156, "learning_rate": 1.5088487354910627e-07, "loss": 2.084621810913086, "step": 122950 }, { "epoch": 0.9925494216317009, "grad_norm": 3.6417293548583984, "learning_rate": 1.492694038108931e-07, "loss": 2.5338531494140626, "step": 122960 }, { "epoch": 0.992630142957468, "grad_norm": 4.226717948913574, "learning_rate": 1.4765393407268e-07, "loss": 2.4262763977050783, "step": 122970 }, { "epoch": 0.992710864283235, "grad_norm": 3.8270063400268555, "learning_rate": 1.4603846433446686e-07, "loss": 2.1003042221069337, "step": 122980 }, { "epoch": 0.992791585609002, "grad_norm": 3.8761703968048096, "learning_rate": 1.4442299459625373e-07, "loss": 2.1686481475830077, "step": 122990 }, { "epoch": 0.9928723069347691, "grad_norm": 2.3394527435302734, "learning_rate": 1.428075248580406e-07, "loss": 2.171636390686035, "step": 123000 }, { "epoch": 0.9929530282605361, "grad_norm": 2.175706624984741, "learning_rate": 1.4119205511982747e-07, "loss": 2.4736600875854493, "step": 123010 }, { "epoch": 0.9930337495863032, "grad_norm": 3.292050838470459, "learning_rate": 1.3957658538161435e-07, "loss": 2.4157949447631837, "step": 123020 }, { "epoch": 0.9931144709120703, "grad_norm": 3.0009560585021973, "learning_rate": 1.3796111564340122e-07, "loss": 1.7468332290649413, "step": 123030 }, { "epoch": 0.9931951922378374, "grad_norm": 1.9539345502853394, "learning_rate": 1.363456459051881e-07, "loss": 2.5281984329223635, "step": 123040 }, { "epoch": 0.9932759135636043, "grad_norm": 2.6638331413269043, "learning_rate": 1.3473017616697496e-07, "loss": 2.763505744934082, "step": 123050 }, { "epoch": 0.9933566348893714, "grad_norm": 5.944690704345703, "learning_rate": 1.3311470642876183e-07, "loss": 1.949333953857422, "step": 123060 }, { "epoch": 0.9934373562151385, "grad_norm": 3.419670820236206, "learning_rate": 1.314992366905487e-07, "loss": 2.369395446777344, "step": 123070 }, { "epoch": 0.9935180775409055, "grad_norm": 2.9831717014312744, "learning_rate": 1.2988376695233558e-07, "loss": 2.2484762191772463, "step": 123080 }, { "epoch": 0.9935987988666726, "grad_norm": 3.402233839035034, "learning_rate": 1.2826829721412245e-07, "loss": 2.720166778564453, "step": 123090 }, { "epoch": 0.9936795201924397, "grad_norm": 1.9478737115859985, "learning_rate": 1.2665282747590932e-07, "loss": 2.255328369140625, "step": 123100 }, { "epoch": 0.9937602415182067, "grad_norm": 2.5876355171203613, "learning_rate": 1.250373577376962e-07, "loss": 2.0491748809814454, "step": 123110 }, { "epoch": 0.9938409628439737, "grad_norm": 5.022403240203857, "learning_rate": 1.2342188799948304e-07, "loss": 1.8750860214233398, "step": 123120 }, { "epoch": 0.9939216841697408, "grad_norm": 3.5161170959472656, "learning_rate": 1.2180641826126994e-07, "loss": 2.5170356750488283, "step": 123130 }, { "epoch": 0.9940024054955079, "grad_norm": 4.850154876708984, "learning_rate": 1.2019094852305679e-07, "loss": 2.4215023040771486, "step": 123140 }, { "epoch": 0.9940831268212749, "grad_norm": 2.742838144302368, "learning_rate": 1.1857547878484367e-07, "loss": 2.318024444580078, "step": 123150 }, { "epoch": 0.994163848147042, "grad_norm": 2.8478527069091797, "learning_rate": 1.1696000904663053e-07, "loss": 2.5822214126586913, "step": 123160 }, { "epoch": 0.994244569472809, "grad_norm": 2.758666753768921, "learning_rate": 1.1534453930841741e-07, "loss": 1.8622339248657227, "step": 123170 }, { "epoch": 0.9943252907985761, "grad_norm": 2.941612482070923, "learning_rate": 1.1372906957020427e-07, "loss": 2.4245302200317385, "step": 123180 }, { "epoch": 0.9944060121243431, "grad_norm": 3.364393711090088, "learning_rate": 1.1211359983199116e-07, "loss": 1.880063819885254, "step": 123190 }, { "epoch": 0.9944867334501102, "grad_norm": 3.5967698097229004, "learning_rate": 1.1049813009377803e-07, "loss": 2.7359619140625, "step": 123200 }, { "epoch": 0.9945674547758773, "grad_norm": 3.834684371948242, "learning_rate": 1.088826603555649e-07, "loss": 2.0814235687255858, "step": 123210 }, { "epoch": 0.9946481761016442, "grad_norm": 2.9017868041992188, "learning_rate": 1.0726719061735178e-07, "loss": 2.0342790603637697, "step": 123220 }, { "epoch": 0.9947288974274113, "grad_norm": 4.508266448974609, "learning_rate": 1.0565172087913863e-07, "loss": 2.6332420349121093, "step": 123230 }, { "epoch": 0.9948096187531784, "grad_norm": 2.4965431690216064, "learning_rate": 1.0403625114092552e-07, "loss": 2.4132442474365234, "step": 123240 }, { "epoch": 0.9948903400789455, "grad_norm": 3.614845037460327, "learning_rate": 1.0242078140271238e-07, "loss": 2.2583688735961913, "step": 123250 }, { "epoch": 0.9949710614047125, "grad_norm": 3.2638185024261475, "learning_rate": 1.0080531166449926e-07, "loss": 2.429792022705078, "step": 123260 }, { "epoch": 0.9950517827304796, "grad_norm": 4.583664417266846, "learning_rate": 9.918984192628612e-08, "loss": 3.0429803848266603, "step": 123270 }, { "epoch": 0.9951325040562466, "grad_norm": 4.890990734100342, "learning_rate": 9.757437218807301e-08, "loss": 2.183029556274414, "step": 123280 }, { "epoch": 0.9952132253820136, "grad_norm": 4.299148082733154, "learning_rate": 9.595890244985987e-08, "loss": 3.2678966522216797, "step": 123290 }, { "epoch": 0.9952939467077807, "grad_norm": 3.9766147136688232, "learning_rate": 9.434343271164674e-08, "loss": 2.2120639801025392, "step": 123300 }, { "epoch": 0.9953746680335478, "grad_norm": 5.768679141998291, "learning_rate": 9.272796297343361e-08, "loss": 2.5122297286987303, "step": 123310 }, { "epoch": 0.9954553893593149, "grad_norm": 3.061546802520752, "learning_rate": 9.111249323522048e-08, "loss": 2.2921756744384765, "step": 123320 }, { "epoch": 0.9955361106850819, "grad_norm": 3.3325841426849365, "learning_rate": 8.949702349700734e-08, "loss": 2.485970687866211, "step": 123330 }, { "epoch": 0.9956168320108489, "grad_norm": 3.2814600467681885, "learning_rate": 8.788155375879423e-08, "loss": 2.2610883712768555, "step": 123340 }, { "epoch": 0.995697553336616, "grad_norm": 4.276379108428955, "learning_rate": 8.626608402058109e-08, "loss": 2.3124664306640623, "step": 123350 }, { "epoch": 0.995778274662383, "grad_norm": 4.223052978515625, "learning_rate": 8.465061428236797e-08, "loss": 2.751336097717285, "step": 123360 }, { "epoch": 0.9958589959881501, "grad_norm": 4.545749187469482, "learning_rate": 8.303514454415483e-08, "loss": 3.1156492233276367, "step": 123370 }, { "epoch": 0.9959397173139172, "grad_norm": 3.772850751876831, "learning_rate": 8.14196748059417e-08, "loss": 2.344375801086426, "step": 123380 }, { "epoch": 0.9960204386396843, "grad_norm": 3.2258315086364746, "learning_rate": 7.980420506772858e-08, "loss": 2.1933176040649416, "step": 123390 }, { "epoch": 0.9961011599654512, "grad_norm": 3.8003733158111572, "learning_rate": 7.818873532951545e-08, "loss": 2.0014408111572264, "step": 123400 }, { "epoch": 0.9961818812912183, "grad_norm": 2.5725796222686768, "learning_rate": 7.657326559130231e-08, "loss": 2.4813186645507814, "step": 123410 }, { "epoch": 0.9962626026169854, "grad_norm": 4.026156902313232, "learning_rate": 7.495779585308918e-08, "loss": 2.3824489593505858, "step": 123420 }, { "epoch": 0.9963433239427525, "grad_norm": 4.044978141784668, "learning_rate": 7.334232611487605e-08, "loss": 2.1642488479614257, "step": 123430 }, { "epoch": 0.9964240452685195, "grad_norm": 3.4300098419189453, "learning_rate": 7.172685637666292e-08, "loss": 2.309057426452637, "step": 123440 }, { "epoch": 0.9965047665942866, "grad_norm": 2.4526243209838867, "learning_rate": 7.011138663844981e-08, "loss": 2.2458091735839845, "step": 123450 }, { "epoch": 0.9965854879200536, "grad_norm": 3.4196507930755615, "learning_rate": 6.849591690023668e-08, "loss": 2.6488019943237306, "step": 123460 }, { "epoch": 0.9966662092458206, "grad_norm": 2.4031336307525635, "learning_rate": 6.688044716202354e-08, "loss": 2.5559099197387694, "step": 123470 }, { "epoch": 0.9967469305715877, "grad_norm": 2.434143304824829, "learning_rate": 6.526497742381041e-08, "loss": 2.0780414581298827, "step": 123480 }, { "epoch": 0.9968276518973548, "grad_norm": 3.318486213684082, "learning_rate": 6.364950768559728e-08, "loss": 2.6545175552368163, "step": 123490 }, { "epoch": 0.9969083732231219, "grad_norm": 4.697328567504883, "learning_rate": 6.203403794738416e-08, "loss": 2.5001522064208985, "step": 123500 }, { "epoch": 0.9969890945488888, "grad_norm": 2.119474411010742, "learning_rate": 6.041856820917103e-08, "loss": 2.2449014663696287, "step": 123510 }, { "epoch": 0.9970698158746559, "grad_norm": 5.141426086425781, "learning_rate": 5.88030984709579e-08, "loss": 2.219781684875488, "step": 123520 }, { "epoch": 0.997150537200423, "grad_norm": 2.894793748855591, "learning_rate": 5.718762873274477e-08, "loss": 2.2302608489990234, "step": 123530 }, { "epoch": 0.99723125852619, "grad_norm": 5.372074604034424, "learning_rate": 5.5572158994531645e-08, "loss": 1.9180145263671875, "step": 123540 }, { "epoch": 0.9973119798519571, "grad_norm": 2.8231711387634277, "learning_rate": 5.395668925631851e-08, "loss": 2.3270843505859373, "step": 123550 }, { "epoch": 0.9973927011777242, "grad_norm": 2.5593149662017822, "learning_rate": 5.234121951810538e-08, "loss": 2.8152496337890627, "step": 123560 }, { "epoch": 0.9974734225034912, "grad_norm": 4.935564994812012, "learning_rate": 5.0725749779892255e-08, "loss": 2.3483394622802733, "step": 123570 }, { "epoch": 0.9975541438292582, "grad_norm": 3.393162965774536, "learning_rate": 4.911028004167913e-08, "loss": 2.194661521911621, "step": 123580 }, { "epoch": 0.9976348651550253, "grad_norm": 4.120192050933838, "learning_rate": 4.749481030346599e-08, "loss": 2.3667781829833983, "step": 123590 }, { "epoch": 0.9977155864807924, "grad_norm": 4.493199825286865, "learning_rate": 4.5879340565252865e-08, "loss": 2.3430574417114256, "step": 123600 }, { "epoch": 0.9977963078065594, "grad_norm": 3.2735705375671387, "learning_rate": 4.426387082703974e-08, "loss": 1.9309993743896485, "step": 123610 }, { "epoch": 0.9978770291323265, "grad_norm": 2.634586811065674, "learning_rate": 4.264840108882661e-08, "loss": 2.838140869140625, "step": 123620 }, { "epoch": 0.9979577504580935, "grad_norm": 4.296531677246094, "learning_rate": 4.103293135061348e-08, "loss": 2.1471979141235353, "step": 123630 }, { "epoch": 0.9980384717838606, "grad_norm": 3.659507989883423, "learning_rate": 3.941746161240035e-08, "loss": 1.9782144546508789, "step": 123640 }, { "epoch": 0.9981191931096276, "grad_norm": 2.8578133583068848, "learning_rate": 3.780199187418722e-08, "loss": 2.4974239349365233, "step": 123650 }, { "epoch": 0.9981999144353947, "grad_norm": 4.139192581176758, "learning_rate": 3.618652213597409e-08, "loss": 3.0156904220581056, "step": 123660 }, { "epoch": 0.9982806357611618, "grad_norm": 2.6265406608581543, "learning_rate": 3.4571052397760964e-08, "loss": 2.2766254425048826, "step": 123670 }, { "epoch": 0.9983613570869287, "grad_norm": 4.062864303588867, "learning_rate": 3.2955582659547836e-08, "loss": 1.9163690567016602, "step": 123680 }, { "epoch": 0.9984420784126958, "grad_norm": 4.606505393981934, "learning_rate": 3.13401129213347e-08, "loss": 2.1692684173583983, "step": 123690 }, { "epoch": 0.9985227997384629, "grad_norm": 1.9067668914794922, "learning_rate": 2.9724643183121573e-08, "loss": 2.4022089004516602, "step": 123700 }, { "epoch": 0.99860352106423, "grad_norm": 2.9502041339874268, "learning_rate": 2.8109173444908446e-08, "loss": 2.298861503601074, "step": 123710 }, { "epoch": 0.998684242389997, "grad_norm": 3.4149973392486572, "learning_rate": 2.6493703706695314e-08, "loss": 2.024481201171875, "step": 123720 }, { "epoch": 0.9987649637157641, "grad_norm": 5.91442346572876, "learning_rate": 2.4878233968482187e-08, "loss": 1.9159265518188477, "step": 123730 }, { "epoch": 0.9988456850415312, "grad_norm": 3.169983148574829, "learning_rate": 2.326276423026906e-08, "loss": 2.17440185546875, "step": 123740 }, { "epoch": 0.9989264063672981, "grad_norm": 2.2118284702301025, "learning_rate": 2.1647294492055928e-08, "loss": 2.048899269104004, "step": 123750 }, { "epoch": 0.9990071276930652, "grad_norm": 1.6198692321777344, "learning_rate": 2.00318247538428e-08, "loss": 2.1173276901245117, "step": 123760 }, { "epoch": 0.9990878490188323, "grad_norm": 3.8648152351379395, "learning_rate": 1.8416355015629672e-08, "loss": 2.788486099243164, "step": 123770 }, { "epoch": 0.9991685703445994, "grad_norm": 4.421558856964111, "learning_rate": 1.6800885277416544e-08, "loss": 2.4945161819458006, "step": 123780 }, { "epoch": 0.9992492916703664, "grad_norm": 2.235769510269165, "learning_rate": 1.5185415539203413e-08, "loss": 2.5644710540771483, "step": 123790 }, { "epoch": 0.9993300129961334, "grad_norm": 4.417644500732422, "learning_rate": 1.3569945800990283e-08, "loss": 2.5949230194091797, "step": 123800 }, { "epoch": 0.9994107343219005, "grad_norm": 3.5787079334259033, "learning_rate": 1.1954476062777156e-08, "loss": 2.3148225784301757, "step": 123810 }, { "epoch": 0.9994914556476675, "grad_norm": 2.5776584148406982, "learning_rate": 1.0339006324564026e-08, "loss": 2.300671195983887, "step": 123820 }, { "epoch": 0.9995721769734346, "grad_norm": 3.340179204940796, "learning_rate": 8.723536586350897e-09, "loss": 1.831557273864746, "step": 123830 }, { "epoch": 0.9996528982992017, "grad_norm": 2.47892165184021, "learning_rate": 7.108066848137767e-09, "loss": 2.2325035095214845, "step": 123840 }, { "epoch": 0.9997336196249688, "grad_norm": 5.304373264312744, "learning_rate": 5.492597109924639e-09, "loss": 2.2326337814331056, "step": 123850 }, { "epoch": 0.9998143409507357, "grad_norm": 3.809691905975342, "learning_rate": 3.87712737171151e-09, "loss": 2.0449317932128905, "step": 123860 }, { "epoch": 0.9998950622765028, "grad_norm": 3.869086980819702, "learning_rate": 2.2616576334983807e-09, "loss": 2.457773971557617, "step": 123870 }, { "epoch": 0.9999757836022699, "grad_norm": 2.694817543029785, "learning_rate": 6.461878952852516e-10, "loss": 2.6917745590209963, "step": 123880 }, { "epoch": 1.0, "step": 123883, "total_flos": 1.2877683635977818e+19, "train_loss": 0.0015091713571641224, "train_runtime": 371.8219, "train_samples_per_second": 1332.713, "train_steps_per_second": 333.178 } ], "logging_steps": 10, "max_steps": 123883, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2877683635977818e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }