| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 2094, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009551098376313277, | |
| "grad_norm": 2.2569775581359863, | |
| "learning_rate": 1.9914040114613182e-05, | |
| "loss": 1.0939, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.019102196752626553, | |
| "grad_norm": 2.2780814170837402, | |
| "learning_rate": 1.981852913085005e-05, | |
| "loss": 1.0778, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02865329512893983, | |
| "grad_norm": 6.2301554679870605, | |
| "learning_rate": 1.9723018147086915e-05, | |
| "loss": 0.9625, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.038204393505253106, | |
| "grad_norm": 24.95392417907715, | |
| "learning_rate": 1.9627507163323785e-05, | |
| "loss": 0.8937, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04775549188156638, | |
| "grad_norm": 3.5438709259033203, | |
| "learning_rate": 1.9531996179560652e-05, | |
| "loss": 0.7843, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05730659025787966, | |
| "grad_norm": 3.102586269378662, | |
| "learning_rate": 1.943648519579752e-05, | |
| "loss": 0.6911, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06685768863419293, | |
| "grad_norm": 1.7375000715255737, | |
| "learning_rate": 1.9340974212034385e-05, | |
| "loss": 0.5772, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07640878701050621, | |
| "grad_norm": 4.719320774078369, | |
| "learning_rate": 1.9245463228271252e-05, | |
| "loss": 0.5894, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08595988538681948, | |
| "grad_norm": 1.8283956050872803, | |
| "learning_rate": 1.9149952244508122e-05, | |
| "loss": 0.405, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09551098376313276, | |
| "grad_norm": 27.354843139648438, | |
| "learning_rate": 1.905444126074499e-05, | |
| "loss": 0.4705, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10506208213944604, | |
| "grad_norm": 4.766273021697998, | |
| "learning_rate": 1.8958930276981855e-05, | |
| "loss": 0.3644, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11461318051575932, | |
| "grad_norm": 4.26953125, | |
| "learning_rate": 1.8863419293218722e-05, | |
| "loss": 0.2284, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.12416427889207259, | |
| "grad_norm": 0.7594988942146301, | |
| "learning_rate": 1.876790830945559e-05, | |
| "loss": 0.2435, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.13371537726838587, | |
| "grad_norm": 1.365365743637085, | |
| "learning_rate": 1.8672397325692455e-05, | |
| "loss": 0.2184, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.14326647564469913, | |
| "grad_norm": 9.257822036743164, | |
| "learning_rate": 1.857688634192932e-05, | |
| "loss": 0.2248, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.15281757402101243, | |
| "grad_norm": 0.5103208422660828, | |
| "learning_rate": 1.848137535816619e-05, | |
| "loss": 0.243, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1623686723973257, | |
| "grad_norm": Infinity, | |
| "learning_rate": 1.8385864374403058e-05, | |
| "loss": 0.2442, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.17191977077363896, | |
| "grad_norm": 0.6302635669708252, | |
| "learning_rate": 1.8290353390639925e-05, | |
| "loss": 0.4167, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.18147086914995225, | |
| "grad_norm": 4.381824493408203, | |
| "learning_rate": 1.819484240687679e-05, | |
| "loss": 0.2861, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.19102196752626552, | |
| "grad_norm": 0.35794690251350403, | |
| "learning_rate": 1.8099331423113658e-05, | |
| "loss": 0.1059, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.20057306590257878, | |
| "grad_norm": 0.4472959637641907, | |
| "learning_rate": 1.8003820439350528e-05, | |
| "loss": 0.0896, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.21012416427889208, | |
| "grad_norm": 0.2821422517299652, | |
| "learning_rate": 1.7908309455587395e-05, | |
| "loss": 0.0837, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.21967526265520534, | |
| "grad_norm": 26.638248443603516, | |
| "learning_rate": 1.781279847182426e-05, | |
| "loss": 0.1115, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.22922636103151864, | |
| "grad_norm": 1.5657349824905396, | |
| "learning_rate": 1.7717287488061128e-05, | |
| "loss": 0.1829, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2387774594078319, | |
| "grad_norm": 0.2336825728416443, | |
| "learning_rate": 1.7621776504297995e-05, | |
| "loss": 0.0556, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.24832855778414517, | |
| "grad_norm": 4.114994525909424, | |
| "learning_rate": 1.752626552053486e-05, | |
| "loss": 0.1532, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.25787965616045844, | |
| "grad_norm": 13.862896919250488, | |
| "learning_rate": 1.743075453677173e-05, | |
| "loss": 0.2155, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.26743075453677173, | |
| "grad_norm": 0.21343673765659332, | |
| "learning_rate": 1.7335243553008598e-05, | |
| "loss": 0.0374, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.276981852913085, | |
| "grad_norm": 0.1753835529088974, | |
| "learning_rate": 1.7239732569245464e-05, | |
| "loss": 0.108, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.28653295128939826, | |
| "grad_norm": 0.24574324488639832, | |
| "learning_rate": 1.714422158548233e-05, | |
| "loss": 0.1175, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.29608404966571156, | |
| "grad_norm": 23.268014907836914, | |
| "learning_rate": 1.7048710601719198e-05, | |
| "loss": 0.1532, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.30563514804202485, | |
| "grad_norm": 0.1591091752052307, | |
| "learning_rate": 1.6953199617956068e-05, | |
| "loss": 0.0994, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3151862464183381, | |
| "grad_norm": 0.17243853211402893, | |
| "learning_rate": 1.6857688634192934e-05, | |
| "loss": 0.083, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3247373447946514, | |
| "grad_norm": 2.6030852794647217, | |
| "learning_rate": 1.67621776504298e-05, | |
| "loss": 0.2142, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3342884431709647, | |
| "grad_norm": 0.15623551607131958, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0821, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3438395415472779, | |
| "grad_norm": 1.7919316291809082, | |
| "learning_rate": 1.6571155682903534e-05, | |
| "loss": 0.1883, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3533906399235912, | |
| "grad_norm": 0.2467084378004074, | |
| "learning_rate": 1.6475644699140404e-05, | |
| "loss": 0.083, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3629417382999045, | |
| "grad_norm": 1.9838752746582031, | |
| "learning_rate": 1.6380133715377267e-05, | |
| "loss": 0.0919, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.37249283667621774, | |
| "grad_norm": 0.13745100796222687, | |
| "learning_rate": 1.6284622731614137e-05, | |
| "loss": 0.2578, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.38204393505253104, | |
| "grad_norm": 23.14975929260254, | |
| "learning_rate": 1.6189111747851004e-05, | |
| "loss": 0.0609, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.39159503342884433, | |
| "grad_norm": 0.16147832572460175, | |
| "learning_rate": 1.609360076408787e-05, | |
| "loss": 0.1865, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.40114613180515757, | |
| "grad_norm": 8.0061616897583, | |
| "learning_rate": 1.599808978032474e-05, | |
| "loss": 0.1216, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.41069723018147086, | |
| "grad_norm": 1.0904359817504883, | |
| "learning_rate": 1.5902578796561604e-05, | |
| "loss": 0.137, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.42024832855778416, | |
| "grad_norm": 2.3162689208984375, | |
| "learning_rate": 1.5807067812798474e-05, | |
| "loss": 0.1358, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4297994269340974, | |
| "grad_norm": 0.11886035650968552, | |
| "learning_rate": 1.571155682903534e-05, | |
| "loss": 0.0733, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4393505253104107, | |
| "grad_norm": 7.155008792877197, | |
| "learning_rate": 1.5616045845272207e-05, | |
| "loss": 0.1238, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.448901623686724, | |
| "grad_norm": 0.11069323867559433, | |
| "learning_rate": 1.5520534861509077e-05, | |
| "loss": 0.1987, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.4584527220630373, | |
| "grad_norm": 0.10282690078020096, | |
| "learning_rate": 1.542502387774594e-05, | |
| "loss": 0.091, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.4680038204393505, | |
| "grad_norm": 0.1403094232082367, | |
| "learning_rate": 1.532951289398281e-05, | |
| "loss": 0.1213, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.4775549188156638, | |
| "grad_norm": 0.1331929713487625, | |
| "learning_rate": 1.5234001910219675e-05, | |
| "loss": 0.0154, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4871060171919771, | |
| "grad_norm": 0.11407709866762161, | |
| "learning_rate": 1.5138490926456543e-05, | |
| "loss": 0.1221, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.49665711556829034, | |
| "grad_norm": 0.5473654270172119, | |
| "learning_rate": 1.5042979942693412e-05, | |
| "loss": 0.1143, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5062082139446036, | |
| "grad_norm": 0.11086848378181458, | |
| "learning_rate": 1.4947468958930278e-05, | |
| "loss": 0.0141, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5157593123209169, | |
| "grad_norm": 78.81968688964844, | |
| "learning_rate": 1.4851957975167147e-05, | |
| "loss": 0.1552, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5253104106972302, | |
| "grad_norm": 6.011876106262207, | |
| "learning_rate": 1.4756446991404012e-05, | |
| "loss": 0.1232, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5348615090735435, | |
| "grad_norm": 0.15130910277366638, | |
| "learning_rate": 1.466093600764088e-05, | |
| "loss": 0.1164, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5444126074498568, | |
| "grad_norm": 0.0998225063085556, | |
| "learning_rate": 1.4565425023877747e-05, | |
| "loss": 0.1248, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.55396370582617, | |
| "grad_norm": 0.09413418173789978, | |
| "learning_rate": 1.4469914040114615e-05, | |
| "loss": 0.1258, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5635148042024832, | |
| "grad_norm": 30.505067825317383, | |
| "learning_rate": 1.4374403056351483e-05, | |
| "loss": 0.0788, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5730659025787965, | |
| "grad_norm": 0.10750491917133331, | |
| "learning_rate": 1.4278892072588348e-05, | |
| "loss": 0.1026, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5826170009551098, | |
| "grad_norm": 0.08975467830896378, | |
| "learning_rate": 1.4183381088825216e-05, | |
| "loss": 0.2408, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5921680993314231, | |
| "grad_norm": 0.12342803925275803, | |
| "learning_rate": 1.4087870105062083e-05, | |
| "loss": 0.0717, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6017191977077364, | |
| "grad_norm": 0.07816806435585022, | |
| "learning_rate": 1.3992359121298951e-05, | |
| "loss": 0.1131, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6112702960840497, | |
| "grad_norm": 0.06454802304506302, | |
| "learning_rate": 1.389684813753582e-05, | |
| "loss": 0.0096, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.620821394460363, | |
| "grad_norm": 0.08184290677309036, | |
| "learning_rate": 1.3801337153772685e-05, | |
| "loss": 0.1253, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6303724928366762, | |
| "grad_norm": 0.07238755375146866, | |
| "learning_rate": 1.3705826170009553e-05, | |
| "loss": 0.0805, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6399235912129895, | |
| "grad_norm": 1.8935270309448242, | |
| "learning_rate": 1.361031518624642e-05, | |
| "loss": 0.1435, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6494746895893028, | |
| "grad_norm": 17.36036491394043, | |
| "learning_rate": 1.3514804202483288e-05, | |
| "loss": 0.0125, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6590257879656161, | |
| "grad_norm": 0.07514392584562302, | |
| "learning_rate": 1.3419293218720153e-05, | |
| "loss": 0.0106, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6685768863419294, | |
| "grad_norm": 4.228085041046143, | |
| "learning_rate": 1.3323782234957021e-05, | |
| "loss": 0.2031, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6781279847182426, | |
| "grad_norm": 0.21097038686275482, | |
| "learning_rate": 1.322827125119389e-05, | |
| "loss": 0.1307, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6876790830945558, | |
| "grad_norm": 0.10752805322408676, | |
| "learning_rate": 1.3132760267430756e-05, | |
| "loss": 0.0989, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6972301814708691, | |
| "grad_norm": 51.11091995239258, | |
| "learning_rate": 1.3037249283667624e-05, | |
| "loss": 0.1572, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7067812798471824, | |
| "grad_norm": 0.08639833331108093, | |
| "learning_rate": 1.2941738299904489e-05, | |
| "loss": 0.0508, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7163323782234957, | |
| "grad_norm": 0.09963525086641312, | |
| "learning_rate": 1.2846227316141357e-05, | |
| "loss": 0.0763, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.725883476599809, | |
| "grad_norm": 0.0683053508400917, | |
| "learning_rate": 1.2750716332378224e-05, | |
| "loss": 0.0567, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7354345749761223, | |
| "grad_norm": 40.62727737426758, | |
| "learning_rate": 1.2655205348615092e-05, | |
| "loss": 0.2471, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7449856733524355, | |
| "grad_norm": 0.07331220805644989, | |
| "learning_rate": 1.2559694364851959e-05, | |
| "loss": 0.1743, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7545367717287488, | |
| "grad_norm": 0.06410760432481766, | |
| "learning_rate": 1.2464183381088826e-05, | |
| "loss": 0.0624, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.7640878701050621, | |
| "grad_norm": 0.11088142544031143, | |
| "learning_rate": 1.2368672397325694e-05, | |
| "loss": 0.0087, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7736389684813754, | |
| "grad_norm": 0.05074993893504143, | |
| "learning_rate": 1.227316141356256e-05, | |
| "loss": 0.1333, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.7831900668576887, | |
| "grad_norm": 0.05052105337381363, | |
| "learning_rate": 1.2177650429799429e-05, | |
| "loss": 0.0829, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.792741165234002, | |
| "grad_norm": 0.06240995600819588, | |
| "learning_rate": 1.2082139446036295e-05, | |
| "loss": 0.0074, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8022922636103151, | |
| "grad_norm": 0.06128745898604393, | |
| "learning_rate": 1.1986628462273162e-05, | |
| "loss": 0.0705, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.8118433619866284, | |
| "grad_norm": 4.010462760925293, | |
| "learning_rate": 1.189111747851003e-05, | |
| "loss": 0.1351, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8213944603629417, | |
| "grad_norm": 0.07143828272819519, | |
| "learning_rate": 1.1795606494746897e-05, | |
| "loss": 0.0514, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.830945558739255, | |
| "grad_norm": 0.06396259367465973, | |
| "learning_rate": 1.1700095510983764e-05, | |
| "loss": 0.0713, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8404966571155683, | |
| "grad_norm": 14.529672622680664, | |
| "learning_rate": 1.160458452722063e-05, | |
| "loss": 0.0513, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.8500477554918816, | |
| "grad_norm": 1.4704737663269043, | |
| "learning_rate": 1.1509073543457498e-05, | |
| "loss": 0.353, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8595988538681948, | |
| "grad_norm": 0.06813743710517883, | |
| "learning_rate": 1.1413562559694367e-05, | |
| "loss": 0.144, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8691499522445081, | |
| "grad_norm": 0.7163823843002319, | |
| "learning_rate": 1.1318051575931233e-05, | |
| "loss": 0.0415, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.8787010506208214, | |
| "grad_norm": 0.05734021216630936, | |
| "learning_rate": 1.12225405921681e-05, | |
| "loss": 0.1911, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.8882521489971347, | |
| "grad_norm": 0.06162785366177559, | |
| "learning_rate": 1.1127029608404967e-05, | |
| "loss": 0.0711, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.897803247373448, | |
| "grad_norm": 0.1327008605003357, | |
| "learning_rate": 1.1031518624641835e-05, | |
| "loss": 0.058, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.9073543457497613, | |
| "grad_norm": 0.05325314775109291, | |
| "learning_rate": 1.0936007640878703e-05, | |
| "loss": 0.0376, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9169054441260746, | |
| "grad_norm": 0.7295445799827576, | |
| "learning_rate": 1.0840496657115568e-05, | |
| "loss": 0.083, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.9264565425023877, | |
| "grad_norm": 0.0540502592921257, | |
| "learning_rate": 1.0744985673352436e-05, | |
| "loss": 0.1386, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.936007640878701, | |
| "grad_norm": 0.1747369021177292, | |
| "learning_rate": 1.0649474689589303e-05, | |
| "loss": 0.0063, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9455587392550143, | |
| "grad_norm": 0.04095704108476639, | |
| "learning_rate": 1.0553963705826171e-05, | |
| "loss": 0.0961, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.9551098376313276, | |
| "grad_norm": 1.786160945892334, | |
| "learning_rate": 1.0458452722063038e-05, | |
| "loss": 0.2077, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9646609360076409, | |
| "grad_norm": 0.057904984802007675, | |
| "learning_rate": 1.0362941738299905e-05, | |
| "loss": 0.1009, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.9742120343839542, | |
| "grad_norm": 0.04530341923236847, | |
| "learning_rate": 1.0267430754536773e-05, | |
| "loss": 0.0077, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.9837631327602674, | |
| "grad_norm": 0.04884221404790878, | |
| "learning_rate": 1.017191977077364e-05, | |
| "loss": 0.0804, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.9933142311365807, | |
| "grad_norm": 5.464759349822998, | |
| "learning_rate": 1.0076408787010508e-05, | |
| "loss": 0.1108, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.10391418635845184, | |
| "eval_runtime": 1.2229, | |
| "eval_samples_per_second": 760.514, | |
| "eval_steps_per_second": 95.678, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 1.002865329512894, | |
| "grad_norm": 0.054792579263448715, | |
| "learning_rate": 9.980897803247374e-06, | |
| "loss": 0.0723, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.0124164278892072, | |
| "grad_norm": 2.5259604454040527, | |
| "learning_rate": 9.885386819484241e-06, | |
| "loss": 0.074, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.0219675262655206, | |
| "grad_norm": 0.06726188212633133, | |
| "learning_rate": 9.78987583572111e-06, | |
| "loss": 0.0709, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.0315186246418337, | |
| "grad_norm": 0.05034675449132919, | |
| "learning_rate": 9.694364851957976e-06, | |
| "loss": 0.0072, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.0410697230181472, | |
| "grad_norm": 1.9011842012405396, | |
| "learning_rate": 9.598853868194843e-06, | |
| "loss": 0.0803, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.0506208213944603, | |
| "grad_norm": 0.05382240563631058, | |
| "learning_rate": 9.50334288443171e-06, | |
| "loss": 0.0247, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.0601719197707737, | |
| "grad_norm": 0.22710120677947998, | |
| "learning_rate": 9.407831900668578e-06, | |
| "loss": 0.0061, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.069723018147087, | |
| "grad_norm": 0.042488373816013336, | |
| "learning_rate": 9.312320916905446e-06, | |
| "loss": 0.1183, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.0792741165234, | |
| "grad_norm": 2.047455072402954, | |
| "learning_rate": 9.216809933142312e-06, | |
| "loss": 0.0765, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.0888252148997135, | |
| "grad_norm": 61.58501052856445, | |
| "learning_rate": 9.121298949379179e-06, | |
| "loss": 0.2333, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.0983763132760267, | |
| "grad_norm": 0.06981759518384933, | |
| "learning_rate": 9.025787965616046e-06, | |
| "loss": 0.2329, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.10792741165234, | |
| "grad_norm": 0.09120076149702072, | |
| "learning_rate": 8.930276981852914e-06, | |
| "loss": 0.0239, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.1174785100286533, | |
| "grad_norm": 2.0684778690338135, | |
| "learning_rate": 8.834765998089782e-06, | |
| "loss": 0.0902, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.1270296084049667, | |
| "grad_norm": 0.08632172644138336, | |
| "learning_rate": 8.739255014326649e-06, | |
| "loss": 0.1149, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.1365807067812799, | |
| "grad_norm": 0.6690634489059448, | |
| "learning_rate": 8.643744030563516e-06, | |
| "loss": 0.0071, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.146131805157593, | |
| "grad_norm": 0.09388808161020279, | |
| "learning_rate": 8.548233046800382e-06, | |
| "loss": 0.0717, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.1556829035339065, | |
| "grad_norm": 0.06623850017786026, | |
| "learning_rate": 8.45272206303725e-06, | |
| "loss": 0.0866, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.1652340019102196, | |
| "grad_norm": 0.05635674670338631, | |
| "learning_rate": 8.357211079274117e-06, | |
| "loss": 0.0914, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.174785100286533, | |
| "grad_norm": 0.0588347390294075, | |
| "learning_rate": 8.261700095510985e-06, | |
| "loss": 0.0461, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.1843361986628462, | |
| "grad_norm": 0.03934504836797714, | |
| "learning_rate": 8.166189111747852e-06, | |
| "loss": 0.1019, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.1938872970391594, | |
| "grad_norm": 0.051371876150369644, | |
| "learning_rate": 8.070678127984719e-06, | |
| "loss": 0.016, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.2034383954154728, | |
| "grad_norm": 0.061191458255052567, | |
| "learning_rate": 7.975167144221587e-06, | |
| "loss": 0.0077, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.212989493791786, | |
| "grad_norm": 0.0495857410132885, | |
| "learning_rate": 7.879656160458454e-06, | |
| "loss": 0.0051, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.2225405921680994, | |
| "grad_norm": 0.04128009453415871, | |
| "learning_rate": 7.78414517669532e-06, | |
| "loss": 0.317, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.2320916905444126, | |
| "grad_norm": 0.03453819081187248, | |
| "learning_rate": 7.688634192932188e-06, | |
| "loss": 0.0608, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.2416427889207258, | |
| "grad_norm": 0.03668952360749245, | |
| "learning_rate": 7.593123209169055e-06, | |
| "loss": 0.0264, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.2511938872970392, | |
| "grad_norm": 0.03199330344796181, | |
| "learning_rate": 7.4976122254059225e-06, | |
| "loss": 0.0047, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.2607449856733524, | |
| "grad_norm": 1.9187037944793701, | |
| "learning_rate": 7.402101241642789e-06, | |
| "loss": 0.1462, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.2702960840496658, | |
| "grad_norm": 0.04411700740456581, | |
| "learning_rate": 7.306590257879657e-06, | |
| "loss": 0.0955, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.279847182425979, | |
| "grad_norm": 0.03471948206424713, | |
| "learning_rate": 7.211079274116523e-06, | |
| "loss": 0.0062, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.2893982808022924, | |
| "grad_norm": 0.042391568422317505, | |
| "learning_rate": 7.115568290353391e-06, | |
| "loss": 0.006, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.2989493791786055, | |
| "grad_norm": 0.04176805168390274, | |
| "learning_rate": 7.020057306590259e-06, | |
| "loss": 0.1142, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.3085004775549187, | |
| "grad_norm": 0.06825416535139084, | |
| "learning_rate": 6.924546322827126e-06, | |
| "loss": 0.1209, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.3180515759312321, | |
| "grad_norm": 0.04017266258597374, | |
| "learning_rate": 6.829035339063993e-06, | |
| "loss": 0.0045, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.3276026743075453, | |
| "grad_norm": 0.0732770562171936, | |
| "learning_rate": 6.73352435530086e-06, | |
| "loss": 0.1188, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.3371537726838587, | |
| "grad_norm": 0.04319130256772041, | |
| "learning_rate": 6.638013371537727e-06, | |
| "loss": 0.007, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.346704871060172, | |
| "grad_norm": 0.08936483412981033, | |
| "learning_rate": 6.542502387774594e-06, | |
| "loss": 0.0118, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.3562559694364853, | |
| "grad_norm": 0.035478316247463226, | |
| "learning_rate": 6.446991404011462e-06, | |
| "loss": 0.0048, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.3658070678127985, | |
| "grad_norm": 0.10484705865383148, | |
| "learning_rate": 6.3514804202483295e-06, | |
| "loss": 0.0056, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.3753581661891117, | |
| "grad_norm": 0.03144150972366333, | |
| "learning_rate": 6.255969436485196e-06, | |
| "loss": 0.0758, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.384909264565425, | |
| "grad_norm": 0.12434408813714981, | |
| "learning_rate": 6.160458452722064e-06, | |
| "loss": 0.0049, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.3944603629417383, | |
| "grad_norm": 3.348506212234497, | |
| "learning_rate": 6.06494746895893e-06, | |
| "loss": 0.0309, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.4040114613180517, | |
| "grad_norm": 0.03951037675142288, | |
| "learning_rate": 5.969436485195798e-06, | |
| "loss": 0.0538, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.4135625596943648, | |
| "grad_norm": 0.04269490763545036, | |
| "learning_rate": 5.873925501432666e-06, | |
| "loss": 0.0044, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.4231136580706782, | |
| "grad_norm": 0.760600745677948, | |
| "learning_rate": 5.778414517669533e-06, | |
| "loss": 0.0973, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.4326647564469914, | |
| "grad_norm": 0.03149113059043884, | |
| "learning_rate": 5.6829035339064e-06, | |
| "loss": 0.0346, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.4422158548233046, | |
| "grad_norm": 0.05680393800139427, | |
| "learning_rate": 5.587392550143267e-06, | |
| "loss": 0.0043, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.451766953199618, | |
| "grad_norm": 0.03370094299316406, | |
| "learning_rate": 5.491881566380134e-06, | |
| "loss": 0.1295, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.4613180515759312, | |
| "grad_norm": 0.045079704374074936, | |
| "learning_rate": 5.396370582617001e-06, | |
| "loss": 0.0736, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.4708691499522444, | |
| "grad_norm": 0.7708030343055725, | |
| "learning_rate": 5.300859598853869e-06, | |
| "loss": 0.0042, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.4804202483285578, | |
| "grad_norm": 0.04070596769452095, | |
| "learning_rate": 5.2053486150907365e-06, | |
| "loss": 0.0301, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.4899713467048712, | |
| "grad_norm": 0.033276643604040146, | |
| "learning_rate": 5.109837631327603e-06, | |
| "loss": 0.0696, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.4995224450811844, | |
| "grad_norm": 0.04143739864230156, | |
| "learning_rate": 5.014326647564471e-06, | |
| "loss": 0.0753, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.5090735434574976, | |
| "grad_norm": 0.032175932079553604, | |
| "learning_rate": 4.918815663801337e-06, | |
| "loss": 0.1394, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.518624641833811, | |
| "grad_norm": 0.045357052236795425, | |
| "learning_rate": 4.823304680038205e-06, | |
| "loss": 0.0066, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.5281757402101241, | |
| "grad_norm": 0.04212405905127525, | |
| "learning_rate": 4.727793696275072e-06, | |
| "loss": 0.0758, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.5377268385864373, | |
| "grad_norm": 0.06627684831619263, | |
| "learning_rate": 4.632282712511939e-06, | |
| "loss": 0.0945, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.5472779369627507, | |
| "grad_norm": 0.04194959998130798, | |
| "learning_rate": 4.536771728748807e-06, | |
| "loss": 0.0046, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.5568290353390641, | |
| "grad_norm": 0.04786338284611702, | |
| "learning_rate": 4.441260744985674e-06, | |
| "loss": 0.0045, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.5663801337153773, | |
| "grad_norm": 0.030701184645295143, | |
| "learning_rate": 4.345749761222541e-06, | |
| "loss": 0.0699, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.5759312320916905, | |
| "grad_norm": 0.043966639786958694, | |
| "learning_rate": 4.250238777459409e-06, | |
| "loss": 0.122, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.585482330468004, | |
| "grad_norm": 0.04325714334845543, | |
| "learning_rate": 4.154727793696275e-06, | |
| "loss": 0.0051, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.595033428844317, | |
| "grad_norm": 0.03839458152651787, | |
| "learning_rate": 4.059216809933143e-06, | |
| "loss": 0.0041, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.6045845272206303, | |
| "grad_norm": 0.02976052649319172, | |
| "learning_rate": 3.96370582617001e-06, | |
| "loss": 0.0563, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.6141356255969437, | |
| "grad_norm": 55.89206314086914, | |
| "learning_rate": 3.868194842406877e-06, | |
| "loss": 0.0258, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.623686723973257, | |
| "grad_norm": 0.057242073118686676, | |
| "learning_rate": 3.772683858643744e-06, | |
| "loss": 0.0042, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.63323782234957, | |
| "grad_norm": 0.0714183896780014, | |
| "learning_rate": 3.6771728748806117e-06, | |
| "loss": 0.005, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.6427889207258835, | |
| "grad_norm": 0.0358208492398262, | |
| "learning_rate": 3.5816618911174787e-06, | |
| "loss": 0.0739, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.6523400191021969, | |
| "grad_norm": 0.039776891469955444, | |
| "learning_rate": 3.4861509073543457e-06, | |
| "loss": 0.0685, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.66189111747851, | |
| "grad_norm": 0.03394331783056259, | |
| "learning_rate": 3.3906399235912136e-06, | |
| "loss": 0.0794, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.6714422158548232, | |
| "grad_norm": 0.031364619731903076, | |
| "learning_rate": 3.2951289398280806e-06, | |
| "loss": 0.118, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.6809933142311366, | |
| "grad_norm": 2.0534205436706543, | |
| "learning_rate": 3.1996179560649477e-06, | |
| "loss": 0.1878, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.6905444126074498, | |
| "grad_norm": 0.038952384144067764, | |
| "learning_rate": 3.104106972301815e-06, | |
| "loss": 0.0738, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.700095510983763, | |
| "grad_norm": 0.029340583831071854, | |
| "learning_rate": 3.008595988538682e-06, | |
| "loss": 0.005, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.7096466093600764, | |
| "grad_norm": 0.04813091456890106, | |
| "learning_rate": 2.9130850047755492e-06, | |
| "loss": 0.0675, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.7191977077363898, | |
| "grad_norm": 0.05901302769780159, | |
| "learning_rate": 2.8175740210124163e-06, | |
| "loss": 0.0479, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.728748806112703, | |
| "grad_norm": 0.044173464179039, | |
| "learning_rate": 2.722063037249284e-06, | |
| "loss": 0.0423, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.7382999044890162, | |
| "grad_norm": 0.03839905560016632, | |
| "learning_rate": 2.626552053486151e-06, | |
| "loss": 0.1425, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.7478510028653296, | |
| "grad_norm": 0.059842657297849655, | |
| "learning_rate": 2.5310410697230182e-06, | |
| "loss": 0.0042, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.7574021012416428, | |
| "grad_norm": 0.24521498382091522, | |
| "learning_rate": 2.4355300859598857e-06, | |
| "loss": 0.0525, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.766953199617956, | |
| "grad_norm": 0.5483675003051758, | |
| "learning_rate": 2.3400191021967527e-06, | |
| "loss": 0.0752, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.7765042979942693, | |
| "grad_norm": 0.036952149122953415, | |
| "learning_rate": 2.24450811843362e-06, | |
| "loss": 0.1074, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.7860553963705827, | |
| "grad_norm": 0.8135057091712952, | |
| "learning_rate": 2.1489971346704872e-06, | |
| "loss": 0.0048, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.795606494746896, | |
| "grad_norm": 0.06449055671691895, | |
| "learning_rate": 2.0534861509073547e-06, | |
| "loss": 0.1038, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.8051575931232091, | |
| "grad_norm": 107.41304779052734, | |
| "learning_rate": 1.9579751671442217e-06, | |
| "loss": 0.0318, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.8147086914995225, | |
| "grad_norm": 0.04807087033987045, | |
| "learning_rate": 1.862464183381089e-06, | |
| "loss": 0.1689, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.8242597898758357, | |
| "grad_norm": 0.04132077470421791, | |
| "learning_rate": 1.7669531996179562e-06, | |
| "loss": 0.0041, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.8338108882521489, | |
| "grad_norm": 0.06250818073749542, | |
| "learning_rate": 1.6714422158548235e-06, | |
| "loss": 0.0052, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.8433619866284623, | |
| "grad_norm": 83.07594299316406, | |
| "learning_rate": 1.5759312320916905e-06, | |
| "loss": 0.174, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.8529130850047757, | |
| "grad_norm": 0.04571348428726196, | |
| "learning_rate": 1.480420248328558e-06, | |
| "loss": 0.0049, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.8624641833810889, | |
| "grad_norm": 0.04580092057585716, | |
| "learning_rate": 1.3849092645654252e-06, | |
| "loss": 0.0638, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.872015281757402, | |
| "grad_norm": 0.04569645971059799, | |
| "learning_rate": 1.2893982808022922e-06, | |
| "loss": 0.0551, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.8815663801337155, | |
| "grad_norm": 0.049619242548942566, | |
| "learning_rate": 1.1938872970391597e-06, | |
| "loss": 0.0436, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.8911174785100286, | |
| "grad_norm": 0.059808436781167984, | |
| "learning_rate": 1.0983763132760267e-06, | |
| "loss": 0.0653, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.9006685768863418, | |
| "grad_norm": 0.038019582629203796, | |
| "learning_rate": 1.002865329512894e-06, | |
| "loss": 0.0632, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.9102196752626552, | |
| "grad_norm": 0.037652261555194855, | |
| "learning_rate": 9.073543457497613e-07, | |
| "loss": 0.0676, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.9197707736389686, | |
| "grad_norm": 0.0339687243103981, | |
| "learning_rate": 8.118433619866285e-07, | |
| "loss": 0.0048, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.9293218720152816, | |
| "grad_norm": 0.043730951845645905, | |
| "learning_rate": 7.163323782234957e-07, | |
| "loss": 0.005, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.938872970391595, | |
| "grad_norm": 0.044012073427438736, | |
| "learning_rate": 6.20821394460363e-07, | |
| "loss": 0.0738, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.9484240687679084, | |
| "grad_norm": 0.34402212500572205, | |
| "learning_rate": 5.253104106972302e-07, | |
| "loss": 0.0062, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.9579751671442216, | |
| "grad_norm": 0.06250176578760147, | |
| "learning_rate": 4.2979942693409743e-07, | |
| "loss": 0.0665, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.9675262655205348, | |
| "grad_norm": 0.06154881417751312, | |
| "learning_rate": 3.342884431709647e-07, | |
| "loss": 0.0266, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.9770773638968482, | |
| "grad_norm": 0.038224026560783386, | |
| "learning_rate": 2.3877745940783193e-07, | |
| "loss": 0.0039, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.9866284622731614, | |
| "grad_norm": 0.668195903301239, | |
| "learning_rate": 1.4326647564469915e-07, | |
| "loss": 0.0162, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.9961795606494745, | |
| "grad_norm": 0.048277534544467926, | |
| "learning_rate": 4.775549188156639e-08, | |
| "loss": 0.0049, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.10492703318595886, | |
| "eval_runtime": 1.2317, | |
| "eval_samples_per_second": 755.069, | |
| "eval_steps_per_second": 94.993, | |
| "step": 2094 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2094, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4403992385931264.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |