{ "best_metric": 1.3688740730285645, "best_model_checkpoint": "saved_model/c2s_dec2024/checkpoint-11928", "epoch": 2.0, "eval_steps": 500, "global_step": 23857, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": NaN, "learning_rate": 0.0, "loss": 74.8478, "step": 1 }, { "epoch": 0.0, "grad_norm": 13.946953773498535, "learning_rate": 2.0000000000000003e-06, "loss": 73.9489, "step": 10 }, { "epoch": 0.0, "grad_norm": 17.25737762451172, "learning_rate": 7.000000000000001e-06, "loss": 73.6478, "step": 20 }, { "epoch": 0.0, "grad_norm": 13.36816692352295, "learning_rate": 1.2e-05, "loss": 72.7254, "step": 30 }, { "epoch": 0.0, "grad_norm": 12.802995681762695, "learning_rate": 1.65e-05, "loss": 72.1129, "step": 40 }, { "epoch": 0.0, "grad_norm": 13.857714653015137, "learning_rate": 2.15e-05, "loss": 67.5212, "step": 50 }, { "epoch": 0.01, "grad_norm": 17.70682716369629, "learning_rate": 2.6500000000000004e-05, "loss": 63.3518, "step": 60 }, { "epoch": 0.01, "grad_norm": 27.735233306884766, "learning_rate": 3.15e-05, "loss": 52.952, "step": 70 }, { "epoch": 0.01, "grad_norm": 25.16404151916504, "learning_rate": 3.65e-05, "loss": 31.7544, "step": 80 }, { "epoch": 0.01, "grad_norm": 12.33328914642334, "learning_rate": 4.15e-05, "loss": 13.379, "step": 90 }, { "epoch": 0.01, "grad_norm": 7.520598888397217, "learning_rate": 4.6500000000000005e-05, "loss": 7.3857, "step": 100 }, { "epoch": 0.01, "grad_norm": 6.755812168121338, "learning_rate": 5.1500000000000005e-05, "loss": 5.2951, "step": 110 }, { "epoch": 0.01, "grad_norm": 7.882263660430908, "learning_rate": 5.65e-05, "loss": 4.2907, "step": 120 }, { "epoch": 0.01, "grad_norm": 7.14441442489624, "learning_rate": 6.15e-05, "loss": 3.9018, "step": 130 }, { "epoch": 0.01, "grad_norm": 6.848437309265137, "learning_rate": 6.65e-05, "loss": 3.7094, "step": 140 }, { "epoch": 0.01, "grad_norm": 5.878305435180664, "learning_rate": 7.15e-05, "loss": 3.5207, "step": 150 }, { "epoch": 0.01, "grad_norm": 6.875974655151367, "learning_rate": 7.65e-05, "loss": 3.5124, "step": 160 }, { "epoch": 0.01, "grad_norm": 4.956191062927246, "learning_rate": 8.15e-05, "loss": 3.3862, "step": 170 }, { "epoch": 0.02, "grad_norm": 6.560382843017578, "learning_rate": 8.65e-05, "loss": 3.3764, "step": 180 }, { "epoch": 0.02, "grad_norm": 5.907286167144775, "learning_rate": 9.15e-05, "loss": 3.2589, "step": 190 }, { "epoch": 0.02, "grad_norm": 5.328891754150391, "learning_rate": 9.65e-05, "loss": 3.2308, "step": 200 }, { "epoch": 0.02, "grad_norm": 6.169933319091797, "learning_rate": 9.999748068525362e-05, "loss": 3.2297, "step": 210 }, { "epoch": 0.02, "grad_norm": 5.502355575561523, "learning_rate": 9.998908296943233e-05, "loss": 3.1368, "step": 220 }, { "epoch": 0.02, "grad_norm": 5.5027313232421875, "learning_rate": 9.998068525361102e-05, "loss": 3.0277, "step": 230 }, { "epoch": 0.02, "grad_norm": 5.679823875427246, "learning_rate": 9.997228753778972e-05, "loss": 2.9859, "step": 240 }, { "epoch": 0.02, "grad_norm": 6.180417537689209, "learning_rate": 9.996388982196843e-05, "loss": 2.9394, "step": 250 }, { "epoch": 0.02, "grad_norm": 5.605576992034912, "learning_rate": 9.995549210614713e-05, "loss": 2.9062, "step": 260 }, { "epoch": 0.02, "grad_norm": 5.151813983917236, "learning_rate": 9.994709439032583e-05, "loss": 2.9341, "step": 270 }, { "epoch": 0.02, "grad_norm": 5.148714065551758, "learning_rate": 9.993869667450453e-05, "loss": 2.9287, "step": 280 }, { "epoch": 0.02, "grad_norm": 4.156353950500488, "learning_rate": 9.993029895868325e-05, "loss": 2.8849, "step": 290 }, { "epoch": 0.03, "grad_norm": 4.806578159332275, "learning_rate": 9.992190124286195e-05, "loss": 2.9143, "step": 300 }, { "epoch": 0.03, "grad_norm": 5.209172248840332, "learning_rate": 9.991350352704065e-05, "loss": 2.819, "step": 310 }, { "epoch": 0.03, "grad_norm": 4.573767185211182, "learning_rate": 9.990510581121935e-05, "loss": 2.8762, "step": 320 }, { "epoch": 0.03, "grad_norm": 4.73787784576416, "learning_rate": 9.989670809539807e-05, "loss": 2.8593, "step": 330 }, { "epoch": 0.03, "grad_norm": 3.8452227115631104, "learning_rate": 9.988831037957675e-05, "loss": 2.7755, "step": 340 }, { "epoch": 0.03, "grad_norm": 5.609687328338623, "learning_rate": 9.987991266375545e-05, "loss": 2.8353, "step": 350 }, { "epoch": 0.03, "grad_norm": 5.1777472496032715, "learning_rate": 9.987151494793417e-05, "loss": 2.7933, "step": 360 }, { "epoch": 0.03, "grad_norm": 4.308431148529053, "learning_rate": 9.986311723211287e-05, "loss": 2.7714, "step": 370 }, { "epoch": 0.03, "grad_norm": 4.116801738739014, "learning_rate": 9.985471951629157e-05, "loss": 2.8051, "step": 380 }, { "epoch": 0.03, "grad_norm": 4.194584369659424, "learning_rate": 9.984632180047028e-05, "loss": 2.7259, "step": 390 }, { "epoch": 0.03, "grad_norm": 4.333553314208984, "learning_rate": 9.983792408464898e-05, "loss": 2.7394, "step": 400 }, { "epoch": 0.03, "grad_norm": 4.2470808029174805, "learning_rate": 9.982952636882768e-05, "loss": 2.7138, "step": 410 }, { "epoch": 0.04, "grad_norm": 3.5597198009490967, "learning_rate": 9.982112865300639e-05, "loss": 2.6112, "step": 420 }, { "epoch": 0.04, "grad_norm": 3.565213918685913, "learning_rate": 9.98127309371851e-05, "loss": 2.5829, "step": 430 }, { "epoch": 0.04, "grad_norm": 3.250683069229126, "learning_rate": 9.98043332213638e-05, "loss": 2.5245, "step": 440 }, { "epoch": 0.04, "grad_norm": 4.06723690032959, "learning_rate": 9.979593550554249e-05, "loss": 2.4863, "step": 450 }, { "epoch": 0.04, "grad_norm": 4.058586120605469, "learning_rate": 9.97875377897212e-05, "loss": 2.4628, "step": 460 }, { "epoch": 0.04, "grad_norm": 3.2529423236846924, "learning_rate": 9.97791400738999e-05, "loss": 2.4223, "step": 470 }, { "epoch": 0.04, "grad_norm": 3.3110713958740234, "learning_rate": 9.97707423580786e-05, "loss": 2.2794, "step": 480 }, { "epoch": 0.04, "grad_norm": 2.3825504779815674, "learning_rate": 9.976234464225732e-05, "loss": 2.2906, "step": 490 }, { "epoch": 0.04, "grad_norm": 2.7417995929718018, "learning_rate": 9.975394692643602e-05, "loss": 2.2686, "step": 500 }, { "epoch": 0.04, "grad_norm": 2.9438352584838867, "learning_rate": 9.974554921061472e-05, "loss": 2.1916, "step": 510 }, { "epoch": 0.04, "grad_norm": 2.3046646118164062, "learning_rate": 9.973715149479342e-05, "loss": 2.2106, "step": 520 }, { "epoch": 0.04, "grad_norm": 2.30639386177063, "learning_rate": 9.972875377897213e-05, "loss": 2.1224, "step": 530 }, { "epoch": 0.05, "grad_norm": 2.3796310424804688, "learning_rate": 9.972035606315083e-05, "loss": 2.0459, "step": 540 }, { "epoch": 0.05, "grad_norm": 2.4527125358581543, "learning_rate": 9.971195834732952e-05, "loss": 2.0884, "step": 550 }, { "epoch": 0.05, "grad_norm": 2.423755645751953, "learning_rate": 9.970356063150824e-05, "loss": 2.0769, "step": 560 }, { "epoch": 0.05, "grad_norm": 2.461524248123169, "learning_rate": 9.969516291568694e-05, "loss": 2.0332, "step": 570 }, { "epoch": 0.05, "grad_norm": 2.436368942260742, "learning_rate": 9.968676519986564e-05, "loss": 2.0041, "step": 580 }, { "epoch": 0.05, "grad_norm": 2.2243542671203613, "learning_rate": 9.967836748404434e-05, "loss": 1.9403, "step": 590 }, { "epoch": 0.05, "grad_norm": 1.9886754751205444, "learning_rate": 9.966996976822305e-05, "loss": 1.886, "step": 600 }, { "epoch": 0.05, "grad_norm": 2.1096863746643066, "learning_rate": 9.966157205240175e-05, "loss": 1.9461, "step": 610 }, { "epoch": 0.05, "grad_norm": 2.2656311988830566, "learning_rate": 9.965317433658045e-05, "loss": 2.0326, "step": 620 }, { "epoch": 0.05, "grad_norm": 2.069763660430908, "learning_rate": 9.964477662075917e-05, "loss": 1.9488, "step": 630 }, { "epoch": 0.05, "grad_norm": 2.215855836868286, "learning_rate": 9.963637890493787e-05, "loss": 1.9374, "step": 640 }, { "epoch": 0.05, "grad_norm": 2.1772992610931396, "learning_rate": 9.962798118911657e-05, "loss": 1.9783, "step": 650 }, { "epoch": 0.06, "grad_norm": 2.4245526790618896, "learning_rate": 9.961958347329527e-05, "loss": 1.8929, "step": 660 }, { "epoch": 0.06, "grad_norm": 1.761659026145935, "learning_rate": 9.961118575747397e-05, "loss": 1.9231, "step": 670 }, { "epoch": 0.06, "grad_norm": 1.8516594171524048, "learning_rate": 9.960278804165267e-05, "loss": 1.9017, "step": 680 }, { "epoch": 0.06, "grad_norm": 2.9810080528259277, "learning_rate": 9.959439032583137e-05, "loss": 1.9036, "step": 690 }, { "epoch": 0.06, "grad_norm": 1.9358642101287842, "learning_rate": 9.958599261001009e-05, "loss": 1.8785, "step": 700 }, { "epoch": 0.06, "grad_norm": 2.024515390396118, "learning_rate": 9.957759489418879e-05, "loss": 1.7852, "step": 710 }, { "epoch": 0.06, "grad_norm": 1.8687939643859863, "learning_rate": 9.956919717836749e-05, "loss": 1.8843, "step": 720 }, { "epoch": 0.06, "grad_norm": 1.6494457721710205, "learning_rate": 9.95607994625462e-05, "loss": 1.879, "step": 730 }, { "epoch": 0.06, "grad_norm": 1.7731373310089111, "learning_rate": 9.95524017467249e-05, "loss": 1.7703, "step": 740 }, { "epoch": 0.06, "grad_norm": 1.8192178010940552, "learning_rate": 9.95440040309036e-05, "loss": 1.8501, "step": 750 }, { "epoch": 0.06, "grad_norm": 1.732409119606018, "learning_rate": 9.95356063150823e-05, "loss": 1.8356, "step": 760 }, { "epoch": 0.06, "grad_norm": 1.898467779159546, "learning_rate": 9.9527208599261e-05, "loss": 1.8914, "step": 770 }, { "epoch": 0.07, "grad_norm": 1.9288657903671265, "learning_rate": 9.95188108834397e-05, "loss": 1.8111, "step": 780 }, { "epoch": 0.07, "grad_norm": 1.6328600645065308, "learning_rate": 9.95104131676184e-05, "loss": 1.782, "step": 790 }, { "epoch": 0.07, "grad_norm": 1.8500330448150635, "learning_rate": 9.950201545179712e-05, "loss": 1.7623, "step": 800 }, { "epoch": 0.07, "grad_norm": 1.769045352935791, "learning_rate": 9.949361773597582e-05, "loss": 1.7836, "step": 810 }, { "epoch": 0.07, "grad_norm": 2.2759504318237305, "learning_rate": 9.948522002015452e-05, "loss": 1.7352, "step": 820 }, { "epoch": 0.07, "grad_norm": 1.5127414464950562, "learning_rate": 9.947682230433322e-05, "loss": 1.7412, "step": 830 }, { "epoch": 0.07, "grad_norm": 1.9331601858139038, "learning_rate": 9.946842458851194e-05, "loss": 1.7243, "step": 840 }, { "epoch": 0.07, "grad_norm": 1.8960950374603271, "learning_rate": 9.946002687269064e-05, "loss": 1.6971, "step": 850 }, { "epoch": 0.07, "grad_norm": 1.9343823194503784, "learning_rate": 9.945162915686934e-05, "loss": 1.8488, "step": 860 }, { "epoch": 0.07, "grad_norm": 1.6667115688323975, "learning_rate": 9.944323144104804e-05, "loss": 1.7665, "step": 870 }, { "epoch": 0.07, "grad_norm": 1.4781936407089233, "learning_rate": 9.943483372522674e-05, "loss": 1.7995, "step": 880 }, { "epoch": 0.07, "grad_norm": 1.8188730478286743, "learning_rate": 9.942643600940544e-05, "loss": 1.7878, "step": 890 }, { "epoch": 0.08, "grad_norm": 2.3943722248077393, "learning_rate": 9.941803829358415e-05, "loss": 1.7612, "step": 900 }, { "epoch": 0.08, "grad_norm": 1.617660641670227, "learning_rate": 9.940964057776285e-05, "loss": 1.7199, "step": 910 }, { "epoch": 0.08, "grad_norm": 1.8252322673797607, "learning_rate": 9.940124286194156e-05, "loss": 1.8101, "step": 920 }, { "epoch": 0.08, "grad_norm": 1.598960041999817, "learning_rate": 9.939284514612026e-05, "loss": 1.7466, "step": 930 }, { "epoch": 0.08, "grad_norm": 1.8220932483673096, "learning_rate": 9.938444743029897e-05, "loss": 1.7581, "step": 940 }, { "epoch": 0.08, "grad_norm": 2.2253363132476807, "learning_rate": 9.937604971447767e-05, "loss": 1.6965, "step": 950 }, { "epoch": 0.08, "grad_norm": 2.05928111076355, "learning_rate": 9.936765199865637e-05, "loss": 1.6986, "step": 960 }, { "epoch": 0.08, "grad_norm": 1.8350791931152344, "learning_rate": 9.935925428283507e-05, "loss": 1.7013, "step": 970 }, { "epoch": 0.08, "grad_norm": 1.6412466764450073, "learning_rate": 9.935085656701377e-05, "loss": 1.7267, "step": 980 }, { "epoch": 0.08, "grad_norm": 2.1339075565338135, "learning_rate": 9.934245885119247e-05, "loss": 1.7274, "step": 990 }, { "epoch": 0.08, "grad_norm": 1.5142629146575928, "learning_rate": 9.933406113537119e-05, "loss": 1.7106, "step": 1000 }, { "epoch": 0.08, "grad_norm": 1.6646658182144165, "learning_rate": 9.932566341954989e-05, "loss": 1.7493, "step": 1010 }, { "epoch": 0.09, "grad_norm": 1.9897111654281616, "learning_rate": 9.931726570372859e-05, "loss": 1.6104, "step": 1020 }, { "epoch": 0.09, "grad_norm": 1.6289434432983398, "learning_rate": 9.930886798790729e-05, "loss": 1.6626, "step": 1030 }, { "epoch": 0.09, "grad_norm": 1.7638473510742188, "learning_rate": 9.9300470272086e-05, "loss": 1.7381, "step": 1040 }, { "epoch": 0.09, "grad_norm": 1.5210635662078857, "learning_rate": 9.92920725562647e-05, "loss": 1.7418, "step": 1050 }, { "epoch": 0.09, "grad_norm": 1.6245310306549072, "learning_rate": 9.92836748404434e-05, "loss": 1.6449, "step": 1060 }, { "epoch": 0.09, "grad_norm": 1.5606416463851929, "learning_rate": 9.92752771246221e-05, "loss": 1.6615, "step": 1070 }, { "epoch": 0.09, "grad_norm": 1.652024745941162, "learning_rate": 9.92668794088008e-05, "loss": 1.643, "step": 1080 }, { "epoch": 0.09, "grad_norm": 1.6315152645111084, "learning_rate": 9.925848169297951e-05, "loss": 1.7083, "step": 1090 }, { "epoch": 0.09, "grad_norm": 1.5722026824951172, "learning_rate": 9.925008397715821e-05, "loss": 1.6538, "step": 1100 }, { "epoch": 0.09, "grad_norm": 1.8919326066970825, "learning_rate": 9.924168626133692e-05, "loss": 1.6246, "step": 1110 }, { "epoch": 0.09, "grad_norm": 2.0076210498809814, "learning_rate": 9.923328854551562e-05, "loss": 1.6728, "step": 1120 }, { "epoch": 0.09, "grad_norm": 1.8884912729263306, "learning_rate": 9.922489082969432e-05, "loss": 1.7246, "step": 1130 }, { "epoch": 0.1, "grad_norm": 1.6543947458267212, "learning_rate": 9.921649311387304e-05, "loss": 1.7703, "step": 1140 }, { "epoch": 0.1, "grad_norm": 1.4063857793807983, "learning_rate": 9.920809539805174e-05, "loss": 1.7649, "step": 1150 }, { "epoch": 0.1, "grad_norm": 1.7405593395233154, "learning_rate": 9.919969768223044e-05, "loss": 1.6634, "step": 1160 }, { "epoch": 0.1, "grad_norm": 1.5649250745773315, "learning_rate": 9.919129996640914e-05, "loss": 1.622, "step": 1170 }, { "epoch": 0.1, "grad_norm": 1.510793924331665, "learning_rate": 9.918290225058785e-05, "loss": 1.6798, "step": 1180 }, { "epoch": 0.1, "grad_norm": 1.4818421602249146, "learning_rate": 9.917450453476654e-05, "loss": 1.7286, "step": 1190 }, { "epoch": 0.1, "grad_norm": 1.5227017402648926, "learning_rate": 9.916610681894524e-05, "loss": 1.6804, "step": 1200 }, { "epoch": 0.1, "grad_norm": 1.819848895072937, "learning_rate": 9.915770910312396e-05, "loss": 1.7091, "step": 1210 }, { "epoch": 0.1, "grad_norm": 1.6922246217727661, "learning_rate": 9.914931138730266e-05, "loss": 1.7101, "step": 1220 }, { "epoch": 0.1, "grad_norm": 1.8223869800567627, "learning_rate": 9.914091367148136e-05, "loss": 1.6555, "step": 1230 }, { "epoch": 0.1, "grad_norm": 1.982619047164917, "learning_rate": 9.913251595566007e-05, "loss": 1.5837, "step": 1240 }, { "epoch": 0.1, "grad_norm": 1.7177784442901611, "learning_rate": 9.912411823983877e-05, "loss": 1.6073, "step": 1250 }, { "epoch": 0.11, "grad_norm": 1.5832000970840454, "learning_rate": 9.911572052401747e-05, "loss": 1.5813, "step": 1260 }, { "epoch": 0.11, "grad_norm": 1.5784751176834106, "learning_rate": 9.910732280819617e-05, "loss": 1.6822, "step": 1270 }, { "epoch": 0.11, "grad_norm": 1.7744930982589722, "learning_rate": 9.909892509237489e-05, "loss": 1.6797, "step": 1280 }, { "epoch": 0.11, "grad_norm": 2.056403875350952, "learning_rate": 9.909052737655358e-05, "loss": 1.6241, "step": 1290 }, { "epoch": 0.11, "grad_norm": 1.5790748596191406, "learning_rate": 9.908212966073228e-05, "loss": 1.645, "step": 1300 }, { "epoch": 0.11, "grad_norm": 1.6673494577407837, "learning_rate": 9.907373194491099e-05, "loss": 1.625, "step": 1310 }, { "epoch": 0.11, "grad_norm": 1.517059326171875, "learning_rate": 9.906533422908969e-05, "loss": 1.5792, "step": 1320 }, { "epoch": 0.11, "grad_norm": 1.6006860733032227, "learning_rate": 9.905693651326839e-05, "loss": 1.6333, "step": 1330 }, { "epoch": 0.11, "grad_norm": 1.4926496744155884, "learning_rate": 9.904853879744709e-05, "loss": 1.5963, "step": 1340 }, { "epoch": 0.11, "grad_norm": 1.5649166107177734, "learning_rate": 9.90401410816258e-05, "loss": 1.6037, "step": 1350 }, { "epoch": 0.11, "grad_norm": 1.5477185249328613, "learning_rate": 9.903174336580451e-05, "loss": 1.5926, "step": 1360 }, { "epoch": 0.11, "grad_norm": 1.4812482595443726, "learning_rate": 9.902334564998321e-05, "loss": 1.6019, "step": 1370 }, { "epoch": 0.12, "grad_norm": 1.6920404434204102, "learning_rate": 9.901494793416192e-05, "loss": 1.6207, "step": 1380 }, { "epoch": 0.12, "grad_norm": 1.8390436172485352, "learning_rate": 9.900655021834062e-05, "loss": 1.6414, "step": 1390 }, { "epoch": 0.12, "grad_norm": 1.5190553665161133, "learning_rate": 9.899815250251931e-05, "loss": 1.6214, "step": 1400 }, { "epoch": 0.12, "grad_norm": 1.5912328958511353, "learning_rate": 9.898975478669802e-05, "loss": 1.5515, "step": 1410 }, { "epoch": 0.12, "grad_norm": 1.6428656578063965, "learning_rate": 9.898135707087673e-05, "loss": 1.5867, "step": 1420 }, { "epoch": 0.12, "grad_norm": 1.7359733581542969, "learning_rate": 9.897295935505543e-05, "loss": 1.6235, "step": 1430 }, { "epoch": 0.12, "grad_norm": 1.4765956401824951, "learning_rate": 9.896456163923413e-05, "loss": 1.6047, "step": 1440 }, { "epoch": 0.12, "grad_norm": 1.5506502389907837, "learning_rate": 9.895616392341284e-05, "loss": 1.6649, "step": 1450 }, { "epoch": 0.12, "grad_norm": 1.713420033454895, "learning_rate": 9.894776620759154e-05, "loss": 1.5648, "step": 1460 }, { "epoch": 0.12, "grad_norm": 1.3702709674835205, "learning_rate": 9.893936849177024e-05, "loss": 1.6239, "step": 1470 }, { "epoch": 0.12, "grad_norm": 1.5771777629852295, "learning_rate": 9.893097077594896e-05, "loss": 1.5701, "step": 1480 }, { "epoch": 0.12, "grad_norm": 1.6002380847930908, "learning_rate": 9.892257306012766e-05, "loss": 1.5596, "step": 1490 }, { "epoch": 0.13, "grad_norm": 1.6560975313186646, "learning_rate": 9.891417534430634e-05, "loss": 1.621, "step": 1500 }, { "epoch": 0.13, "grad_norm": 1.880449891090393, "learning_rate": 9.890577762848506e-05, "loss": 1.5582, "step": 1510 }, { "epoch": 0.13, "grad_norm": 1.608383297920227, "learning_rate": 9.889737991266376e-05, "loss": 1.591, "step": 1520 }, { "epoch": 0.13, "grad_norm": 1.4543036222457886, "learning_rate": 9.888898219684246e-05, "loss": 1.6353, "step": 1530 }, { "epoch": 0.13, "grad_norm": 1.5987427234649658, "learning_rate": 9.888058448102116e-05, "loss": 1.5747, "step": 1540 }, { "epoch": 0.13, "grad_norm": 1.4784051179885864, "learning_rate": 9.887218676519987e-05, "loss": 1.589, "step": 1550 }, { "epoch": 0.13, "grad_norm": 1.4499478340148926, "learning_rate": 9.886378904937858e-05, "loss": 1.5605, "step": 1560 }, { "epoch": 0.13, "grad_norm": 2.133988380432129, "learning_rate": 9.885539133355728e-05, "loss": 1.5543, "step": 1570 }, { "epoch": 0.13, "grad_norm": 1.6200871467590332, "learning_rate": 9.884699361773598e-05, "loss": 1.5787, "step": 1580 }, { "epoch": 0.13, "grad_norm": 1.5930988788604736, "learning_rate": 9.883859590191469e-05, "loss": 1.6567, "step": 1590 }, { "epoch": 0.13, "grad_norm": 1.498531699180603, "learning_rate": 9.883019818609339e-05, "loss": 1.5763, "step": 1600 }, { "epoch": 0.13, "grad_norm": 1.6275144815444946, "learning_rate": 9.882180047027209e-05, "loss": 1.575, "step": 1610 }, { "epoch": 0.14, "grad_norm": 1.5897412300109863, "learning_rate": 9.881340275445079e-05, "loss": 1.5799, "step": 1620 }, { "epoch": 0.14, "grad_norm": 1.5260472297668457, "learning_rate": 9.88050050386295e-05, "loss": 1.5347, "step": 1630 }, { "epoch": 0.14, "grad_norm": 1.5881023406982422, "learning_rate": 9.87966073228082e-05, "loss": 1.53, "step": 1640 }, { "epoch": 0.14, "grad_norm": 1.548144817352295, "learning_rate": 9.878820960698691e-05, "loss": 1.5536, "step": 1650 }, { "epoch": 0.14, "grad_norm": 1.6072583198547363, "learning_rate": 9.877981189116561e-05, "loss": 1.5693, "step": 1660 }, { "epoch": 0.14, "grad_norm": 1.8414642810821533, "learning_rate": 9.877141417534431e-05, "loss": 1.5985, "step": 1670 }, { "epoch": 0.14, "grad_norm": 1.6828004121780396, "learning_rate": 9.876301645952301e-05, "loss": 1.5622, "step": 1680 }, { "epoch": 0.14, "grad_norm": 1.4494273662567139, "learning_rate": 9.875461874370172e-05, "loss": 1.5535, "step": 1690 }, { "epoch": 0.14, "grad_norm": 1.6359646320343018, "learning_rate": 9.874622102788043e-05, "loss": 1.5739, "step": 1700 }, { "epoch": 0.14, "grad_norm": 1.6679375171661377, "learning_rate": 9.873782331205913e-05, "loss": 1.5461, "step": 1710 }, { "epoch": 0.14, "grad_norm": 1.581217646598816, "learning_rate": 9.872942559623783e-05, "loss": 1.5059, "step": 1720 }, { "epoch": 0.15, "grad_norm": 1.5814200639724731, "learning_rate": 9.872102788041653e-05, "loss": 1.5596, "step": 1730 }, { "epoch": 0.15, "grad_norm": 1.4673880338668823, "learning_rate": 9.871263016459523e-05, "loss": 1.517, "step": 1740 }, { "epoch": 0.15, "grad_norm": 1.8205835819244385, "learning_rate": 9.870423244877394e-05, "loss": 1.5124, "step": 1750 }, { "epoch": 0.15, "grad_norm": 1.6103378534317017, "learning_rate": 9.869583473295264e-05, "loss": 1.5258, "step": 1760 }, { "epoch": 0.15, "grad_norm": 1.6353694200515747, "learning_rate": 9.868743701713134e-05, "loss": 1.4667, "step": 1770 }, { "epoch": 0.15, "grad_norm": 1.530811071395874, "learning_rate": 9.867903930131004e-05, "loss": 1.4667, "step": 1780 }, { "epoch": 0.15, "grad_norm": 1.6762795448303223, "learning_rate": 9.867064158548876e-05, "loss": 1.5848, "step": 1790 }, { "epoch": 0.15, "grad_norm": 1.54753839969635, "learning_rate": 9.866224386966746e-05, "loss": 1.5819, "step": 1800 }, { "epoch": 0.15, "grad_norm": 1.3980042934417725, "learning_rate": 9.865384615384616e-05, "loss": 1.5145, "step": 1810 }, { "epoch": 0.15, "grad_norm": 1.5987871885299683, "learning_rate": 9.864544843802486e-05, "loss": 1.5615, "step": 1820 }, { "epoch": 0.15, "grad_norm": 1.598125696182251, "learning_rate": 9.863705072220356e-05, "loss": 1.493, "step": 1830 }, { "epoch": 0.15, "grad_norm": 1.4321807622909546, "learning_rate": 9.862865300638226e-05, "loss": 1.4781, "step": 1840 }, { "epoch": 0.16, "grad_norm": 1.6206380128860474, "learning_rate": 9.862025529056098e-05, "loss": 1.5203, "step": 1850 }, { "epoch": 0.16, "grad_norm": 1.4826364517211914, "learning_rate": 9.861185757473968e-05, "loss": 1.5283, "step": 1860 }, { "epoch": 0.16, "grad_norm": 1.5955564975738525, "learning_rate": 9.860345985891838e-05, "loss": 1.5282, "step": 1870 }, { "epoch": 0.16, "grad_norm": 1.689847469329834, "learning_rate": 9.859506214309708e-05, "loss": 1.491, "step": 1880 }, { "epoch": 0.16, "grad_norm": 1.6846811771392822, "learning_rate": 9.858666442727579e-05, "loss": 1.5077, "step": 1890 }, { "epoch": 0.16, "grad_norm": 1.6529998779296875, "learning_rate": 9.85782667114545e-05, "loss": 1.5171, "step": 1900 }, { "epoch": 0.16, "grad_norm": 1.607906699180603, "learning_rate": 9.85698689956332e-05, "loss": 1.4852, "step": 1910 }, { "epoch": 0.16, "grad_norm": 1.4496583938598633, "learning_rate": 9.85614712798119e-05, "loss": 1.5379, "step": 1920 }, { "epoch": 0.16, "grad_norm": 1.5358500480651855, "learning_rate": 9.85530735639906e-05, "loss": 1.4627, "step": 1930 }, { "epoch": 0.16, "grad_norm": 1.600895643234253, "learning_rate": 9.85446758481693e-05, "loss": 1.4699, "step": 1940 }, { "epoch": 0.16, "grad_norm": 1.5847537517547607, "learning_rate": 9.8536278132348e-05, "loss": 1.4595, "step": 1950 }, { "epoch": 0.16, "grad_norm": 1.5020571947097778, "learning_rate": 9.852788041652671e-05, "loss": 1.4592, "step": 1960 }, { "epoch": 0.17, "grad_norm": 1.583520531654358, "learning_rate": 9.851948270070541e-05, "loss": 1.4813, "step": 1970 }, { "epoch": 0.17, "grad_norm": 1.5839787721633911, "learning_rate": 9.851108498488411e-05, "loss": 1.4936, "step": 1980 }, { "epoch": 0.17, "grad_norm": 1.5823278427124023, "learning_rate": 9.850268726906283e-05, "loss": 1.5422, "step": 1990 }, { "epoch": 0.17, "grad_norm": 1.7043323516845703, "learning_rate": 9.849428955324153e-05, "loss": 1.4834, "step": 2000 }, { "epoch": 0.17, "grad_norm": 1.630943775177002, "learning_rate": 9.848589183742023e-05, "loss": 1.4653, "step": 2010 }, { "epoch": 0.17, "grad_norm": 1.5110965967178345, "learning_rate": 9.847749412159893e-05, "loss": 1.46, "step": 2020 }, { "epoch": 0.17, "grad_norm": 1.657193660736084, "learning_rate": 9.846909640577763e-05, "loss": 1.4669, "step": 2030 }, { "epoch": 0.17, "grad_norm": 1.6347354650497437, "learning_rate": 9.846069868995633e-05, "loss": 1.502, "step": 2040 }, { "epoch": 0.17, "grad_norm": 1.6823369264602661, "learning_rate": 9.845230097413503e-05, "loss": 1.4591, "step": 2050 }, { "epoch": 0.17, "grad_norm": 1.7435994148254395, "learning_rate": 9.844390325831375e-05, "loss": 1.4785, "step": 2060 }, { "epoch": 0.17, "grad_norm": 1.4445375204086304, "learning_rate": 9.843550554249245e-05, "loss": 1.4508, "step": 2070 }, { "epoch": 0.17, "grad_norm": 1.5260076522827148, "learning_rate": 9.842710782667115e-05, "loss": 1.4983, "step": 2080 }, { "epoch": 0.18, "grad_norm": 1.6740764379501343, "learning_rate": 9.841871011084986e-05, "loss": 1.4386, "step": 2090 }, { "epoch": 0.18, "grad_norm": 1.517861008644104, "learning_rate": 9.841031239502856e-05, "loss": 1.4352, "step": 2100 }, { "epoch": 0.18, "grad_norm": 1.708284854888916, "learning_rate": 9.840191467920726e-05, "loss": 1.4434, "step": 2110 }, { "epoch": 0.18, "grad_norm": 1.4811726808547974, "learning_rate": 9.839351696338596e-05, "loss": 1.4244, "step": 2120 }, { "epoch": 0.18, "grad_norm": 1.6109236478805542, "learning_rate": 9.838511924756468e-05, "loss": 1.4156, "step": 2130 }, { "epoch": 0.18, "grad_norm": 1.6670873165130615, "learning_rate": 9.837672153174336e-05, "loss": 1.4482, "step": 2140 }, { "epoch": 0.18, "grad_norm": 1.3890584707260132, "learning_rate": 9.836832381592207e-05, "loss": 1.448, "step": 2150 }, { "epoch": 0.18, "grad_norm": 1.6003090143203735, "learning_rate": 9.835992610010078e-05, "loss": 1.446, "step": 2160 }, { "epoch": 0.18, "grad_norm": 1.4101319313049316, "learning_rate": 9.835152838427948e-05, "loss": 1.4429, "step": 2170 }, { "epoch": 0.18, "grad_norm": 1.7009485960006714, "learning_rate": 9.834313066845818e-05, "loss": 1.4242, "step": 2180 }, { "epoch": 0.18, "grad_norm": 1.4896700382232666, "learning_rate": 9.833473295263688e-05, "loss": 1.4165, "step": 2190 }, { "epoch": 0.18, "grad_norm": 1.4147573709487915, "learning_rate": 9.83263352368156e-05, "loss": 1.4139, "step": 2200 }, { "epoch": 0.19, "grad_norm": 1.4627562761306763, "learning_rate": 9.83179375209943e-05, "loss": 1.4183, "step": 2210 }, { "epoch": 0.19, "grad_norm": 1.4203674793243408, "learning_rate": 9.8309539805173e-05, "loss": 1.4039, "step": 2220 }, { "epoch": 0.19, "grad_norm": 1.2355053424835205, "learning_rate": 9.830114208935171e-05, "loss": 1.4043, "step": 2230 }, { "epoch": 0.19, "grad_norm": 1.4707199335098267, "learning_rate": 9.829274437353041e-05, "loss": 1.3782, "step": 2240 }, { "epoch": 0.19, "grad_norm": 1.3370139598846436, "learning_rate": 9.82843466577091e-05, "loss": 1.386, "step": 2250 }, { "epoch": 0.19, "grad_norm": 1.231668472290039, "learning_rate": 9.827594894188781e-05, "loss": 1.4013, "step": 2260 }, { "epoch": 0.19, "grad_norm": 1.7293065786361694, "learning_rate": 9.826755122606651e-05, "loss": 1.435, "step": 2270 }, { "epoch": 0.19, "grad_norm": 1.3585022687911987, "learning_rate": 9.825915351024521e-05, "loss": 1.407, "step": 2280 }, { "epoch": 0.19, "grad_norm": 1.3481025695800781, "learning_rate": 9.825075579442392e-05, "loss": 1.3815, "step": 2290 }, { "epoch": 0.19, "grad_norm": 1.437150239944458, "learning_rate": 9.824235807860263e-05, "loss": 1.3801, "step": 2300 }, { "epoch": 0.19, "grad_norm": 1.405595302581787, "learning_rate": 9.823396036278133e-05, "loss": 1.3788, "step": 2310 }, { "epoch": 0.19, "grad_norm": 1.2758052349090576, "learning_rate": 9.822556264696003e-05, "loss": 1.3897, "step": 2320 }, { "epoch": 0.2, "grad_norm": 1.2883045673370361, "learning_rate": 9.821716493113875e-05, "loss": 1.4058, "step": 2330 }, { "epoch": 0.2, "grad_norm": 1.2251172065734863, "learning_rate": 9.820876721531745e-05, "loss": 1.3813, "step": 2340 }, { "epoch": 0.2, "grad_norm": 1.2864748239517212, "learning_rate": 9.820036949949613e-05, "loss": 1.4176, "step": 2350 }, { "epoch": 0.2, "grad_norm": 1.1874711513519287, "learning_rate": 9.819197178367485e-05, "loss": 1.4, "step": 2360 }, { "epoch": 0.2, "grad_norm": 1.185160517692566, "learning_rate": 9.818357406785355e-05, "loss": 1.3999, "step": 2370 }, { "epoch": 0.2, "grad_norm": 1.1715202331542969, "learning_rate": 9.817517635203225e-05, "loss": 1.3976, "step": 2380 }, { "epoch": 0.2, "grad_norm": 1.2971388101577759, "learning_rate": 9.816677863621095e-05, "loss": 1.3743, "step": 2390 }, { "epoch": 0.2, "grad_norm": 1.3223909139633179, "learning_rate": 9.815838092038966e-05, "loss": 1.4166, "step": 2400 }, { "epoch": 0.2, "grad_norm": 1.138414978981018, "learning_rate": 9.814998320456836e-05, "loss": 1.3848, "step": 2410 }, { "epoch": 0.2, "grad_norm": 1.0639983415603638, "learning_rate": 9.814158548874706e-05, "loss": 1.4086, "step": 2420 }, { "epoch": 0.2, "grad_norm": 1.1832982301712036, "learning_rate": 9.813318777292577e-05, "loss": 1.3755, "step": 2430 }, { "epoch": 0.2, "grad_norm": 1.1679545640945435, "learning_rate": 9.812479005710448e-05, "loss": 1.3657, "step": 2440 }, { "epoch": 0.21, "grad_norm": 1.3011316061019897, "learning_rate": 9.811639234128318e-05, "loss": 1.3952, "step": 2450 }, { "epoch": 0.21, "grad_norm": 1.189110279083252, "learning_rate": 9.810799462546187e-05, "loss": 1.3809, "step": 2460 }, { "epoch": 0.21, "grad_norm": 1.1846518516540527, "learning_rate": 9.809959690964058e-05, "loss": 1.37, "step": 2470 }, { "epoch": 0.21, "grad_norm": 1.091399908065796, "learning_rate": 9.809119919381928e-05, "loss": 1.3758, "step": 2480 }, { "epoch": 0.21, "grad_norm": 1.2157222032546997, "learning_rate": 9.808280147799798e-05, "loss": 1.3915, "step": 2490 }, { "epoch": 0.21, "grad_norm": 1.0647687911987305, "learning_rate": 9.80744037621767e-05, "loss": 1.3691, "step": 2500 }, { "epoch": 0.21, "grad_norm": 1.2293319702148438, "learning_rate": 9.80660060463554e-05, "loss": 1.3363, "step": 2510 }, { "epoch": 0.21, "grad_norm": 0.9640077948570251, "learning_rate": 9.80576083305341e-05, "loss": 1.3572, "step": 2520 }, { "epoch": 0.21, "grad_norm": 1.1838915348052979, "learning_rate": 9.80492106147128e-05, "loss": 1.3949, "step": 2530 }, { "epoch": 0.21, "grad_norm": 1.2546766996383667, "learning_rate": 9.804081289889151e-05, "loss": 1.3735, "step": 2540 }, { "epoch": 0.21, "grad_norm": 1.21607506275177, "learning_rate": 9.803241518307021e-05, "loss": 1.3972, "step": 2550 }, { "epoch": 0.21, "grad_norm": 1.1871435642242432, "learning_rate": 9.80240174672489e-05, "loss": 1.3574, "step": 2560 }, { "epoch": 0.22, "grad_norm": 1.048129677772522, "learning_rate": 9.801561975142762e-05, "loss": 1.3403, "step": 2570 }, { "epoch": 0.22, "grad_norm": 1.2536293268203735, "learning_rate": 9.800722203560632e-05, "loss": 1.3687, "step": 2580 }, { "epoch": 0.22, "grad_norm": 1.1996736526489258, "learning_rate": 9.799882431978502e-05, "loss": 1.3523, "step": 2590 }, { "epoch": 0.22, "grad_norm": 1.1699773073196411, "learning_rate": 9.799042660396373e-05, "loss": 1.33, "step": 2600 }, { "epoch": 0.22, "grad_norm": 1.3010507822036743, "learning_rate": 9.798202888814243e-05, "loss": 1.3843, "step": 2610 }, { "epoch": 0.22, "grad_norm": 1.1527621746063232, "learning_rate": 9.797363117232113e-05, "loss": 1.3588, "step": 2620 }, { "epoch": 0.22, "grad_norm": 1.1328866481781006, "learning_rate": 9.796523345649983e-05, "loss": 1.3437, "step": 2630 }, { "epoch": 0.22, "grad_norm": 1.090882658958435, "learning_rate": 9.795683574067855e-05, "loss": 1.3776, "step": 2640 }, { "epoch": 0.22, "grad_norm": 1.256652593612671, "learning_rate": 9.794843802485725e-05, "loss": 1.3196, "step": 2650 }, { "epoch": 0.22, "grad_norm": 1.0616453886032104, "learning_rate": 9.794004030903595e-05, "loss": 1.341, "step": 2660 }, { "epoch": 0.22, "grad_norm": 1.085680365562439, "learning_rate": 9.793164259321465e-05, "loss": 1.3547, "step": 2670 }, { "epoch": 0.22, "grad_norm": 1.0737241506576538, "learning_rate": 9.792324487739335e-05, "loss": 1.3835, "step": 2680 }, { "epoch": 0.23, "grad_norm": 1.245523452758789, "learning_rate": 9.791484716157205e-05, "loss": 1.3737, "step": 2690 }, { "epoch": 0.23, "grad_norm": 1.0836365222930908, "learning_rate": 9.790644944575075e-05, "loss": 1.3777, "step": 2700 }, { "epoch": 0.23, "grad_norm": 1.1912739276885986, "learning_rate": 9.789805172992947e-05, "loss": 1.3563, "step": 2710 }, { "epoch": 0.23, "grad_norm": 1.2736655473709106, "learning_rate": 9.788965401410817e-05, "loss": 1.3194, "step": 2720 }, { "epoch": 0.23, "grad_norm": 1.104282021522522, "learning_rate": 9.788125629828687e-05, "loss": 1.3479, "step": 2730 }, { "epoch": 0.23, "grad_norm": 1.048940658569336, "learning_rate": 9.787285858246558e-05, "loss": 1.3532, "step": 2740 }, { "epoch": 0.23, "grad_norm": 1.0983320474624634, "learning_rate": 9.786446086664428e-05, "loss": 1.3562, "step": 2750 }, { "epoch": 0.23, "grad_norm": 1.1402326822280884, "learning_rate": 9.785606315082298e-05, "loss": 1.354, "step": 2760 }, { "epoch": 0.23, "grad_norm": 1.2619010210037231, "learning_rate": 9.784766543500168e-05, "loss": 1.3521, "step": 2770 }, { "epoch": 0.23, "grad_norm": 1.0591953992843628, "learning_rate": 9.783926771918038e-05, "loss": 1.3789, "step": 2780 }, { "epoch": 0.23, "grad_norm": 1.1339819431304932, "learning_rate": 9.783087000335909e-05, "loss": 1.3256, "step": 2790 }, { "epoch": 0.23, "grad_norm": 1.1497844457626343, "learning_rate": 9.782247228753779e-05, "loss": 1.3132, "step": 2800 }, { "epoch": 0.24, "grad_norm": 1.1759591102600098, "learning_rate": 9.78140745717165e-05, "loss": 1.3177, "step": 2810 }, { "epoch": 0.24, "grad_norm": 0.9412186145782471, "learning_rate": 9.78056768558952e-05, "loss": 1.3229, "step": 2820 }, { "epoch": 0.24, "grad_norm": 0.9941621422767639, "learning_rate": 9.77972791400739e-05, "loss": 1.3377, "step": 2830 }, { "epoch": 0.24, "grad_norm": 1.157085657119751, "learning_rate": 9.778888142425262e-05, "loss": 1.3507, "step": 2840 }, { "epoch": 0.24, "grad_norm": 1.0099543333053589, "learning_rate": 9.778048370843132e-05, "loss": 1.3161, "step": 2850 }, { "epoch": 0.24, "grad_norm": 1.0422873497009277, "learning_rate": 9.777208599261002e-05, "loss": 1.333, "step": 2860 }, { "epoch": 0.24, "grad_norm": 0.9914569854736328, "learning_rate": 9.776368827678872e-05, "loss": 1.3464, "step": 2870 }, { "epoch": 0.24, "grad_norm": 1.021345853805542, "learning_rate": 9.775529056096742e-05, "loss": 1.3211, "step": 2880 }, { "epoch": 0.24, "grad_norm": 1.109337568283081, "learning_rate": 9.774689284514612e-05, "loss": 1.3276, "step": 2890 }, { "epoch": 0.24, "grad_norm": 1.103621482849121, "learning_rate": 9.773849512932482e-05, "loss": 1.3461, "step": 2900 }, { "epoch": 0.24, "grad_norm": 0.9739851355552673, "learning_rate": 9.773009741350353e-05, "loss": 1.3455, "step": 2910 }, { "epoch": 0.24, "grad_norm": 1.0525223016738892, "learning_rate": 9.772169969768223e-05, "loss": 1.3217, "step": 2920 }, { "epoch": 0.25, "grad_norm": 1.0851151943206787, "learning_rate": 9.771330198186094e-05, "loss": 1.3086, "step": 2930 }, { "epoch": 0.25, "grad_norm": 0.9990787506103516, "learning_rate": 9.770490426603964e-05, "loss": 1.3356, "step": 2940 }, { "epoch": 0.25, "grad_norm": 0.95279461145401, "learning_rate": 9.769650655021835e-05, "loss": 1.3354, "step": 2950 }, { "epoch": 0.25, "grad_norm": 1.021781325340271, "learning_rate": 9.768810883439705e-05, "loss": 1.345, "step": 2960 }, { "epoch": 0.25, "grad_norm": 1.0377811193466187, "learning_rate": 9.767971111857575e-05, "loss": 1.336, "step": 2970 }, { "epoch": 0.25, "grad_norm": 1.1588681936264038, "learning_rate": 9.767131340275447e-05, "loss": 1.3243, "step": 2980 }, { "epoch": 0.25, "grad_norm": 1.020140528678894, "learning_rate": 9.766291568693315e-05, "loss": 1.3228, "step": 2990 }, { "epoch": 0.25, "grad_norm": 1.0705044269561768, "learning_rate": 9.765451797111185e-05, "loss": 1.3296, "step": 3000 }, { "epoch": 0.25, "grad_norm": 1.0218137502670288, "learning_rate": 9.764612025529057e-05, "loss": 1.3411, "step": 3010 }, { "epoch": 0.25, "grad_norm": 0.9504896402359009, "learning_rate": 9.763772253946927e-05, "loss": 1.3453, "step": 3020 }, { "epoch": 0.25, "grad_norm": 0.9712496399879456, "learning_rate": 9.762932482364797e-05, "loss": 1.3374, "step": 3030 }, { "epoch": 0.25, "grad_norm": 1.0498982667922974, "learning_rate": 9.762092710782667e-05, "loss": 1.3487, "step": 3040 }, { "epoch": 0.26, "grad_norm": 1.003515601158142, "learning_rate": 9.761252939200538e-05, "loss": 1.3378, "step": 3050 }, { "epoch": 0.26, "grad_norm": 1.0123205184936523, "learning_rate": 9.760413167618409e-05, "loss": 1.3169, "step": 3060 }, { "epoch": 0.26, "grad_norm": 1.0419187545776367, "learning_rate": 9.759573396036279e-05, "loss": 1.2904, "step": 3070 }, { "epoch": 0.26, "grad_norm": 1.0047117471694946, "learning_rate": 9.75873362445415e-05, "loss": 1.3109, "step": 3080 }, { "epoch": 0.26, "grad_norm": 1.0547492504119873, "learning_rate": 9.757893852872019e-05, "loss": 1.3664, "step": 3090 }, { "epoch": 0.26, "grad_norm": 1.0447371006011963, "learning_rate": 9.757054081289889e-05, "loss": 1.3109, "step": 3100 }, { "epoch": 0.26, "grad_norm": 0.9989799857139587, "learning_rate": 9.75621430970776e-05, "loss": 1.3102, "step": 3110 }, { "epoch": 0.26, "grad_norm": 1.0217671394348145, "learning_rate": 9.75537453812563e-05, "loss": 1.329, "step": 3120 }, { "epoch": 0.26, "grad_norm": 1.0475112199783325, "learning_rate": 9.7545347665435e-05, "loss": 1.3207, "step": 3130 }, { "epoch": 0.26, "grad_norm": 1.1760674715042114, "learning_rate": 9.75369499496137e-05, "loss": 1.3253, "step": 3140 }, { "epoch": 0.26, "grad_norm": 1.0421987771987915, "learning_rate": 9.752855223379242e-05, "loss": 1.3213, "step": 3150 }, { "epoch": 0.26, "grad_norm": 1.071460247039795, "learning_rate": 9.752015451797112e-05, "loss": 1.3033, "step": 3160 }, { "epoch": 0.27, "grad_norm": 1.061184287071228, "learning_rate": 9.751175680214982e-05, "loss": 1.327, "step": 3170 }, { "epoch": 0.27, "grad_norm": 0.9795036315917969, "learning_rate": 9.750335908632852e-05, "loss": 1.3328, "step": 3180 }, { "epoch": 0.27, "grad_norm": 1.0202151536941528, "learning_rate": 9.749496137050723e-05, "loss": 1.3191, "step": 3190 }, { "epoch": 0.27, "grad_norm": 0.9541215896606445, "learning_rate": 9.748656365468592e-05, "loss": 1.3145, "step": 3200 }, { "epoch": 0.27, "grad_norm": 0.9995015263557434, "learning_rate": 9.747816593886464e-05, "loss": 1.3217, "step": 3210 }, { "epoch": 0.27, "grad_norm": 1.1494383811950684, "learning_rate": 9.746976822304334e-05, "loss": 1.3004, "step": 3220 }, { "epoch": 0.27, "grad_norm": 1.0284998416900635, "learning_rate": 9.746137050722204e-05, "loss": 1.3176, "step": 3230 }, { "epoch": 0.27, "grad_norm": 1.1794145107269287, "learning_rate": 9.745297279140074e-05, "loss": 1.3441, "step": 3240 }, { "epoch": 0.27, "grad_norm": 0.961778461933136, "learning_rate": 9.744457507557945e-05, "loss": 1.3103, "step": 3250 }, { "epoch": 0.27, "grad_norm": 0.9705653786659241, "learning_rate": 9.743617735975815e-05, "loss": 1.3338, "step": 3260 }, { "epoch": 0.27, "grad_norm": 0.991642951965332, "learning_rate": 9.742777964393685e-05, "loss": 1.3029, "step": 3270 }, { "epoch": 0.27, "grad_norm": 1.0278716087341309, "learning_rate": 9.741938192811555e-05, "loss": 1.3048, "step": 3280 }, { "epoch": 0.28, "grad_norm": 0.9765398502349854, "learning_rate": 9.741098421229427e-05, "loss": 1.3168, "step": 3290 }, { "epoch": 0.28, "grad_norm": 0.9255518913269043, "learning_rate": 9.740258649647297e-05, "loss": 1.3615, "step": 3300 }, { "epoch": 0.28, "grad_norm": 0.9744580388069153, "learning_rate": 9.739418878065166e-05, "loss": 1.3185, "step": 3310 }, { "epoch": 0.28, "grad_norm": 1.084482192993164, "learning_rate": 9.738579106483037e-05, "loss": 1.3495, "step": 3320 }, { "epoch": 0.28, "grad_norm": 1.2292135953903198, "learning_rate": 9.737739334900907e-05, "loss": 1.3105, "step": 3330 }, { "epoch": 0.28, "grad_norm": 0.961018443107605, "learning_rate": 9.736899563318777e-05, "loss": 1.3035, "step": 3340 }, { "epoch": 0.28, "grad_norm": 1.075959324836731, "learning_rate": 9.736059791736649e-05, "loss": 1.3054, "step": 3350 }, { "epoch": 0.28, "grad_norm": 1.0361067056655884, "learning_rate": 9.735220020154519e-05, "loss": 1.2918, "step": 3360 }, { "epoch": 0.28, "grad_norm": 0.9005823731422424, "learning_rate": 9.734380248572389e-05, "loss": 1.3137, "step": 3370 }, { "epoch": 0.28, "grad_norm": 0.9153285026550293, "learning_rate": 9.733540476990259e-05, "loss": 1.3288, "step": 3380 }, { "epoch": 0.28, "grad_norm": 0.8635299205780029, "learning_rate": 9.73270070540813e-05, "loss": 1.3137, "step": 3390 }, { "epoch": 0.29, "grad_norm": 1.0367728471755981, "learning_rate": 9.731860933826e-05, "loss": 1.2876, "step": 3400 }, { "epoch": 0.29, "grad_norm": 0.9176615476608276, "learning_rate": 9.731021162243869e-05, "loss": 1.3387, "step": 3410 }, { "epoch": 0.29, "grad_norm": 1.0236858129501343, "learning_rate": 9.73018139066174e-05, "loss": 1.3155, "step": 3420 }, { "epoch": 0.29, "grad_norm": 0.9770137071609497, "learning_rate": 9.72934161907961e-05, "loss": 1.3125, "step": 3430 }, { "epoch": 0.29, "grad_norm": 0.9246246814727783, "learning_rate": 9.72850184749748e-05, "loss": 1.336, "step": 3440 }, { "epoch": 0.29, "grad_norm": 0.9181159138679504, "learning_rate": 9.727662075915352e-05, "loss": 1.308, "step": 3450 }, { "epoch": 0.29, "grad_norm": 1.0865639448165894, "learning_rate": 9.726822304333222e-05, "loss": 1.301, "step": 3460 }, { "epoch": 0.29, "grad_norm": 0.8808737993240356, "learning_rate": 9.725982532751092e-05, "loss": 1.2944, "step": 3470 }, { "epoch": 0.29, "grad_norm": 0.91978919506073, "learning_rate": 9.725142761168962e-05, "loss": 1.2873, "step": 3480 }, { "epoch": 0.29, "grad_norm": 0.9652817845344543, "learning_rate": 9.724302989586834e-05, "loss": 1.2933, "step": 3490 }, { "epoch": 0.29, "grad_norm": 0.8826923966407776, "learning_rate": 9.723463218004704e-05, "loss": 1.3111, "step": 3500 }, { "epoch": 0.29, "grad_norm": 0.9343136548995972, "learning_rate": 9.722623446422574e-05, "loss": 1.3041, "step": 3510 }, { "epoch": 0.3, "grad_norm": 0.9622355699539185, "learning_rate": 9.721783674840444e-05, "loss": 1.3273, "step": 3520 }, { "epoch": 0.3, "grad_norm": 0.8856210112571716, "learning_rate": 9.720943903258314e-05, "loss": 1.308, "step": 3530 }, { "epoch": 0.3, "grad_norm": 1.0706381797790527, "learning_rate": 9.720104131676184e-05, "loss": 1.2895, "step": 3540 }, { "epoch": 0.3, "grad_norm": 0.9392056465148926, "learning_rate": 9.719264360094054e-05, "loss": 1.3051, "step": 3550 }, { "epoch": 0.3, "grad_norm": 0.9185264110565186, "learning_rate": 9.718424588511926e-05, "loss": 1.3149, "step": 3560 }, { "epoch": 0.3, "grad_norm": 0.9602808356285095, "learning_rate": 9.717584816929796e-05, "loss": 1.3119, "step": 3570 }, { "epoch": 0.3, "grad_norm": 0.9972320795059204, "learning_rate": 9.716745045347666e-05, "loss": 1.2968, "step": 3580 }, { "epoch": 0.3, "grad_norm": 0.9500466585159302, "learning_rate": 9.715905273765537e-05, "loss": 1.3258, "step": 3590 }, { "epoch": 0.3, "grad_norm": 1.031611680984497, "learning_rate": 9.715065502183407e-05, "loss": 1.2904, "step": 3600 }, { "epoch": 0.3, "grad_norm": 0.9221827387809753, "learning_rate": 9.714225730601277e-05, "loss": 1.2762, "step": 3610 }, { "epoch": 0.3, "grad_norm": 1.0512107610702515, "learning_rate": 9.713385959019147e-05, "loss": 1.3046, "step": 3620 }, { "epoch": 0.3, "grad_norm": 0.9803313612937927, "learning_rate": 9.712546187437017e-05, "loss": 1.3, "step": 3630 }, { "epoch": 0.31, "grad_norm": 0.8865204453468323, "learning_rate": 9.711706415854887e-05, "loss": 1.3094, "step": 3640 }, { "epoch": 0.31, "grad_norm": 1.0050348043441772, "learning_rate": 9.710866644272758e-05, "loss": 1.3196, "step": 3650 }, { "epoch": 0.31, "grad_norm": 0.8442556262016296, "learning_rate": 9.710026872690629e-05, "loss": 1.3467, "step": 3660 }, { "epoch": 0.31, "grad_norm": 0.9233782887458801, "learning_rate": 9.709187101108499e-05, "loss": 1.2706, "step": 3670 }, { "epoch": 0.31, "grad_norm": 0.9729004502296448, "learning_rate": 9.708347329526369e-05, "loss": 1.336, "step": 3680 }, { "epoch": 0.31, "grad_norm": 0.9508803486824036, "learning_rate": 9.70750755794424e-05, "loss": 1.3274, "step": 3690 }, { "epoch": 0.31, "grad_norm": 0.9443732500076294, "learning_rate": 9.70666778636211e-05, "loss": 1.2841, "step": 3700 }, { "epoch": 0.31, "grad_norm": 0.868013322353363, "learning_rate": 9.70582801477998e-05, "loss": 1.2957, "step": 3710 }, { "epoch": 0.31, "grad_norm": 0.965334415435791, "learning_rate": 9.704988243197851e-05, "loss": 1.3304, "step": 3720 }, { "epoch": 0.31, "grad_norm": 0.9084999561309814, "learning_rate": 9.704148471615721e-05, "loss": 1.2923, "step": 3730 }, { "epoch": 0.31, "grad_norm": 0.919133722782135, "learning_rate": 9.703308700033591e-05, "loss": 1.29, "step": 3740 }, { "epoch": 0.31, "grad_norm": 0.9951848983764648, "learning_rate": 9.702468928451461e-05, "loss": 1.259, "step": 3750 }, { "epoch": 0.32, "grad_norm": 0.9185649752616882, "learning_rate": 9.701629156869332e-05, "loss": 1.2836, "step": 3760 }, { "epoch": 0.32, "grad_norm": 1.054055094718933, "learning_rate": 9.700789385287202e-05, "loss": 1.3071, "step": 3770 }, { "epoch": 0.32, "grad_norm": 0.8889206051826477, "learning_rate": 9.699949613705072e-05, "loss": 1.288, "step": 3780 }, { "epoch": 0.32, "grad_norm": 1.0557209253311157, "learning_rate": 9.699109842122943e-05, "loss": 1.2997, "step": 3790 }, { "epoch": 0.32, "grad_norm": 0.9376937747001648, "learning_rate": 9.698270070540814e-05, "loss": 1.2725, "step": 3800 }, { "epoch": 0.32, "grad_norm": 0.9468439817428589, "learning_rate": 9.697430298958684e-05, "loss": 1.2995, "step": 3810 }, { "epoch": 0.32, "grad_norm": 0.9373131394386292, "learning_rate": 9.696590527376554e-05, "loss": 1.2954, "step": 3820 }, { "epoch": 0.32, "grad_norm": 0.9124709367752075, "learning_rate": 9.695750755794426e-05, "loss": 1.2801, "step": 3830 }, { "epoch": 0.32, "grad_norm": 0.8868261575698853, "learning_rate": 9.694910984212294e-05, "loss": 1.3157, "step": 3840 }, { "epoch": 0.32, "grad_norm": 0.9589577317237854, "learning_rate": 9.694071212630164e-05, "loss": 1.2965, "step": 3850 }, { "epoch": 0.32, "grad_norm": 3.692502737045288, "learning_rate": 9.693231441048036e-05, "loss": 1.2856, "step": 3860 }, { "epoch": 0.32, "grad_norm": 1.1974432468414307, "learning_rate": 9.692391669465906e-05, "loss": 1.297, "step": 3870 }, { "epoch": 0.33, "grad_norm": 1.018686294555664, "learning_rate": 9.691551897883776e-05, "loss": 1.2363, "step": 3880 }, { "epoch": 0.33, "grad_norm": 0.9238420128822327, "learning_rate": 9.690712126301646e-05, "loss": 1.3077, "step": 3890 }, { "epoch": 0.33, "grad_norm": 1.0184767246246338, "learning_rate": 9.689872354719517e-05, "loss": 1.2869, "step": 3900 }, { "epoch": 0.33, "grad_norm": 1.2680937051773071, "learning_rate": 9.689032583137387e-05, "loss": 1.2759, "step": 3910 }, { "epoch": 0.33, "grad_norm": 1.0533655881881714, "learning_rate": 9.688192811555257e-05, "loss": 1.3035, "step": 3920 }, { "epoch": 0.33, "grad_norm": 1.1051759719848633, "learning_rate": 9.687353039973129e-05, "loss": 1.2891, "step": 3930 }, { "epoch": 0.33, "grad_norm": 1.1200437545776367, "learning_rate": 9.686513268390998e-05, "loss": 1.2887, "step": 3940 }, { "epoch": 0.33, "grad_norm": 1.049575686454773, "learning_rate": 9.685673496808868e-05, "loss": 1.2664, "step": 3950 }, { "epoch": 0.33, "grad_norm": 0.9664704203605652, "learning_rate": 9.684833725226739e-05, "loss": 1.2763, "step": 3960 }, { "epoch": 0.33, "grad_norm": 0.984643280506134, "learning_rate": 9.683993953644609e-05, "loss": 1.3093, "step": 3970 }, { "epoch": 0.33, "grad_norm": 1.1789389848709106, "learning_rate": 9.683154182062479e-05, "loss": 1.3181, "step": 3980 }, { "epoch": 0.33, "grad_norm": 0.9238845109939575, "learning_rate": 9.68231441048035e-05, "loss": 1.2999, "step": 3990 }, { "epoch": 0.34, "grad_norm": 0.9249722957611084, "learning_rate": 9.681474638898221e-05, "loss": 1.278, "step": 4000 }, { "epoch": 0.34, "grad_norm": 0.9088603258132935, "learning_rate": 9.680634867316091e-05, "loss": 1.2853, "step": 4010 }, { "epoch": 0.34, "grad_norm": 0.9020570516586304, "learning_rate": 9.679795095733961e-05, "loss": 1.289, "step": 4020 }, { "epoch": 0.34, "grad_norm": 0.940483570098877, "learning_rate": 9.678955324151831e-05, "loss": 1.2916, "step": 4030 }, { "epoch": 0.34, "grad_norm": 0.9913098812103271, "learning_rate": 9.678115552569702e-05, "loss": 1.2713, "step": 4040 }, { "epoch": 0.34, "grad_norm": 0.9511467218399048, "learning_rate": 9.677275780987571e-05, "loss": 1.2768, "step": 4050 }, { "epoch": 0.34, "grad_norm": 0.9045311212539673, "learning_rate": 9.676436009405441e-05, "loss": 1.2752, "step": 4060 }, { "epoch": 0.34, "grad_norm": 0.8729120492935181, "learning_rate": 9.675596237823313e-05, "loss": 1.2921, "step": 4070 }, { "epoch": 0.34, "grad_norm": 0.8952471613883972, "learning_rate": 9.674756466241183e-05, "loss": 1.2502, "step": 4080 }, { "epoch": 0.34, "grad_norm": 0.986374020576477, "learning_rate": 9.673916694659053e-05, "loss": 1.2775, "step": 4090 }, { "epoch": 0.34, "grad_norm": 0.8978623747825623, "learning_rate": 9.673076923076924e-05, "loss": 1.3128, "step": 4100 }, { "epoch": 0.34, "grad_norm": 0.8826311826705933, "learning_rate": 9.672237151494794e-05, "loss": 1.297, "step": 4110 }, { "epoch": 0.35, "grad_norm": 0.96685391664505, "learning_rate": 9.671397379912664e-05, "loss": 1.2506, "step": 4120 }, { "epoch": 0.35, "grad_norm": 0.9718626141548157, "learning_rate": 9.670557608330534e-05, "loss": 1.2876, "step": 4130 }, { "epoch": 0.35, "grad_norm": 0.8737813830375671, "learning_rate": 9.669717836748406e-05, "loss": 1.2789, "step": 4140 }, { "epoch": 0.35, "grad_norm": 0.9341663122177124, "learning_rate": 9.668878065166275e-05, "loss": 1.2711, "step": 4150 }, { "epoch": 0.35, "grad_norm": 0.970474123954773, "learning_rate": 9.668038293584145e-05, "loss": 1.2909, "step": 4160 }, { "epoch": 0.35, "grad_norm": 0.8914270401000977, "learning_rate": 9.667198522002016e-05, "loss": 1.2858, "step": 4170 }, { "epoch": 0.35, "grad_norm": 0.9272024631500244, "learning_rate": 9.666358750419886e-05, "loss": 1.2892, "step": 4180 }, { "epoch": 0.35, "grad_norm": 1.0669282674789429, "learning_rate": 9.665518978837756e-05, "loss": 1.2738, "step": 4190 }, { "epoch": 0.35, "grad_norm": 1.016957402229309, "learning_rate": 9.664679207255628e-05, "loss": 1.2725, "step": 4200 }, { "epoch": 0.35, "grad_norm": 0.8794147372245789, "learning_rate": 9.663839435673498e-05, "loss": 1.3021, "step": 4210 }, { "epoch": 0.35, "grad_norm": 0.9839082956314087, "learning_rate": 9.662999664091368e-05, "loss": 1.2664, "step": 4220 }, { "epoch": 0.35, "grad_norm": 0.9609381556510925, "learning_rate": 9.662159892509238e-05, "loss": 1.2475, "step": 4230 }, { "epoch": 0.36, "grad_norm": 0.9309755563735962, "learning_rate": 9.661320120927109e-05, "loss": 1.291, "step": 4240 }, { "epoch": 0.36, "grad_norm": 0.9350253343582153, "learning_rate": 9.660480349344979e-05, "loss": 1.2724, "step": 4250 }, { "epoch": 0.36, "grad_norm": 0.842782735824585, "learning_rate": 9.659640577762848e-05, "loss": 1.2922, "step": 4260 }, { "epoch": 0.36, "grad_norm": 0.9164971709251404, "learning_rate": 9.65880080618072e-05, "loss": 1.2648, "step": 4270 }, { "epoch": 0.36, "grad_norm": 0.8968715071678162, "learning_rate": 9.65796103459859e-05, "loss": 1.27, "step": 4280 }, { "epoch": 0.36, "grad_norm": 0.9750767946243286, "learning_rate": 9.65712126301646e-05, "loss": 1.2772, "step": 4290 }, { "epoch": 0.36, "grad_norm": 0.8595716953277588, "learning_rate": 9.65628149143433e-05, "loss": 1.272, "step": 4300 }, { "epoch": 0.36, "grad_norm": 0.9079431295394897, "learning_rate": 9.655441719852201e-05, "loss": 1.2988, "step": 4310 }, { "epoch": 0.36, "grad_norm": 0.8873557448387146, "learning_rate": 9.654601948270071e-05, "loss": 1.2621, "step": 4320 }, { "epoch": 0.36, "grad_norm": 0.9525663256645203, "learning_rate": 9.653762176687941e-05, "loss": 1.304, "step": 4330 }, { "epoch": 0.36, "grad_norm": 0.9584593176841736, "learning_rate": 9.652922405105813e-05, "loss": 1.2749, "step": 4340 }, { "epoch": 0.36, "grad_norm": 0.9332358837127686, "learning_rate": 9.652082633523683e-05, "loss": 1.2664, "step": 4350 }, { "epoch": 0.37, "grad_norm": 0.9320657849311829, "learning_rate": 9.651242861941551e-05, "loss": 1.2689, "step": 4360 }, { "epoch": 0.37, "grad_norm": 0.9525415301322937, "learning_rate": 9.650403090359423e-05, "loss": 1.2391, "step": 4370 }, { "epoch": 0.37, "grad_norm": 0.9248800873756409, "learning_rate": 9.649563318777293e-05, "loss": 1.2483, "step": 4380 }, { "epoch": 0.37, "grad_norm": 0.9141398072242737, "learning_rate": 9.648723547195163e-05, "loss": 1.2667, "step": 4390 }, { "epoch": 0.37, "grad_norm": 1.0400909185409546, "learning_rate": 9.647883775613033e-05, "loss": 1.2725, "step": 4400 }, { "epoch": 0.37, "grad_norm": 0.8996667265892029, "learning_rate": 9.647044004030904e-05, "loss": 1.2561, "step": 4410 }, { "epoch": 0.37, "grad_norm": 0.9660255908966064, "learning_rate": 9.646204232448774e-05, "loss": 1.2951, "step": 4420 }, { "epoch": 0.37, "grad_norm": 0.8532864451408386, "learning_rate": 9.645364460866645e-05, "loss": 1.2612, "step": 4430 }, { "epoch": 0.37, "grad_norm": 0.8848567605018616, "learning_rate": 9.644524689284516e-05, "loss": 1.2857, "step": 4440 }, { "epoch": 0.37, "grad_norm": 0.9615711569786072, "learning_rate": 9.643684917702386e-05, "loss": 1.2464, "step": 4450 }, { "epoch": 0.37, "grad_norm": 0.8904231786727905, "learning_rate": 9.642845146120256e-05, "loss": 1.2728, "step": 4460 }, { "epoch": 0.37, "grad_norm": 0.8821005821228027, "learning_rate": 9.642005374538126e-05, "loss": 1.2547, "step": 4470 }, { "epoch": 0.38, "grad_norm": 0.9640454649925232, "learning_rate": 9.641165602955996e-05, "loss": 1.2678, "step": 4480 }, { "epoch": 0.38, "grad_norm": 0.9105495810508728, "learning_rate": 9.640325831373866e-05, "loss": 1.292, "step": 4490 }, { "epoch": 0.38, "grad_norm": 0.8753247261047363, "learning_rate": 9.639486059791736e-05, "loss": 1.2948, "step": 4500 }, { "epoch": 0.38, "grad_norm": 0.9737322926521301, "learning_rate": 9.638646288209608e-05, "loss": 1.2815, "step": 4510 }, { "epoch": 0.38, "grad_norm": 0.990397572517395, "learning_rate": 9.637806516627478e-05, "loss": 1.2795, "step": 4520 }, { "epoch": 0.38, "grad_norm": 0.8985497951507568, "learning_rate": 9.636966745045348e-05, "loss": 1.2681, "step": 4530 }, { "epoch": 0.38, "grad_norm": 0.9467973709106445, "learning_rate": 9.636126973463218e-05, "loss": 1.2363, "step": 4540 }, { "epoch": 0.38, "grad_norm": 0.8291370868682861, "learning_rate": 9.63528720188109e-05, "loss": 1.2906, "step": 4550 }, { "epoch": 0.38, "grad_norm": 0.9621056914329529, "learning_rate": 9.63444743029896e-05, "loss": 1.2666, "step": 4560 }, { "epoch": 0.38, "grad_norm": 0.9514476656913757, "learning_rate": 9.63360765871683e-05, "loss": 1.2506, "step": 4570 }, { "epoch": 0.38, "grad_norm": 1.0280085802078247, "learning_rate": 9.6327678871347e-05, "loss": 1.2565, "step": 4580 }, { "epoch": 0.38, "grad_norm": 0.8504949808120728, "learning_rate": 9.63192811555257e-05, "loss": 1.2685, "step": 4590 }, { "epoch": 0.39, "grad_norm": 1.6871306896209717, "learning_rate": 9.63108834397044e-05, "loss": 1.2822, "step": 4600 }, { "epoch": 0.39, "grad_norm": 0.9096508026123047, "learning_rate": 9.630248572388311e-05, "loss": 1.2605, "step": 4610 }, { "epoch": 0.39, "grad_norm": 0.9401909112930298, "learning_rate": 9.629408800806181e-05, "loss": 1.2963, "step": 4620 }, { "epoch": 0.39, "grad_norm": 0.9390792846679688, "learning_rate": 9.628569029224051e-05, "loss": 1.2491, "step": 4630 }, { "epoch": 0.39, "grad_norm": 0.9321569204330444, "learning_rate": 9.627729257641921e-05, "loss": 1.2379, "step": 4640 }, { "epoch": 0.39, "grad_norm": 0.8815140724182129, "learning_rate": 9.626889486059793e-05, "loss": 1.2647, "step": 4650 }, { "epoch": 0.39, "grad_norm": 0.9437508583068848, "learning_rate": 9.626049714477663e-05, "loss": 1.2941, "step": 4660 }, { "epoch": 0.39, "grad_norm": 0.9065150618553162, "learning_rate": 9.625209942895533e-05, "loss": 1.2586, "step": 4670 }, { "epoch": 0.39, "grad_norm": 0.897009551525116, "learning_rate": 9.624370171313403e-05, "loss": 1.2517, "step": 4680 }, { "epoch": 0.39, "grad_norm": 0.9548740983009338, "learning_rate": 9.623530399731273e-05, "loss": 1.2523, "step": 4690 }, { "epoch": 0.39, "grad_norm": 0.9641335606575012, "learning_rate": 9.622690628149143e-05, "loss": 1.2527, "step": 4700 }, { "epoch": 0.39, "grad_norm": 0.9327057600021362, "learning_rate": 9.621850856567015e-05, "loss": 1.2544, "step": 4710 }, { "epoch": 0.4, "grad_norm": 0.8007282018661499, "learning_rate": 9.621011084984885e-05, "loss": 1.2438, "step": 4720 }, { "epoch": 0.4, "grad_norm": 0.9581779837608337, "learning_rate": 9.620171313402755e-05, "loss": 1.2542, "step": 4730 }, { "epoch": 0.4, "grad_norm": 0.8529838919639587, "learning_rate": 9.619331541820625e-05, "loss": 1.2661, "step": 4740 }, { "epoch": 0.4, "grad_norm": 0.8764471411705017, "learning_rate": 9.618491770238496e-05, "loss": 1.2458, "step": 4750 }, { "epoch": 0.4, "grad_norm": 0.8999229669570923, "learning_rate": 9.617651998656366e-05, "loss": 1.2775, "step": 4760 }, { "epoch": 0.4, "grad_norm": 0.8830211758613586, "learning_rate": 9.616812227074236e-05, "loss": 1.2525, "step": 4770 }, { "epoch": 0.4, "grad_norm": 0.8796391487121582, "learning_rate": 9.615972455492106e-05, "loss": 1.2651, "step": 4780 }, { "epoch": 0.4, "grad_norm": 1.0258424282073975, "learning_rate": 9.615132683909977e-05, "loss": 1.2467, "step": 4790 }, { "epoch": 0.4, "grad_norm": 0.9051372408866882, "learning_rate": 9.614292912327847e-05, "loss": 1.251, "step": 4800 }, { "epoch": 0.4, "grad_norm": 0.8889206051826477, "learning_rate": 9.613453140745717e-05, "loss": 1.2472, "step": 4810 }, { "epoch": 0.4, "grad_norm": 0.8488065600395203, "learning_rate": 9.612613369163588e-05, "loss": 1.2444, "step": 4820 }, { "epoch": 0.4, "grad_norm": 0.8944873213768005, "learning_rate": 9.611773597581458e-05, "loss": 1.2504, "step": 4830 }, { "epoch": 0.41, "grad_norm": 0.8767454028129578, "learning_rate": 9.610933825999328e-05, "loss": 1.2464, "step": 4840 }, { "epoch": 0.41, "grad_norm": 0.8748544454574585, "learning_rate": 9.6100940544172e-05, "loss": 1.2676, "step": 4850 }, { "epoch": 0.41, "grad_norm": 0.9283246397972107, "learning_rate": 9.60925428283507e-05, "loss": 1.2513, "step": 4860 }, { "epoch": 0.41, "grad_norm": 0.9286162853240967, "learning_rate": 9.60841451125294e-05, "loss": 1.2723, "step": 4870 }, { "epoch": 0.41, "grad_norm": 0.8313989639282227, "learning_rate": 9.60757473967081e-05, "loss": 1.2681, "step": 4880 }, { "epoch": 0.41, "grad_norm": 0.9085673093795776, "learning_rate": 9.60673496808868e-05, "loss": 1.2321, "step": 4890 }, { "epoch": 0.41, "grad_norm": 0.9357420802116394, "learning_rate": 9.60589519650655e-05, "loss": 1.2549, "step": 4900 }, { "epoch": 0.41, "grad_norm": 0.879021942615509, "learning_rate": 9.60505542492442e-05, "loss": 1.2443, "step": 4910 }, { "epoch": 0.41, "grad_norm": 0.8786951303482056, "learning_rate": 9.604215653342291e-05, "loss": 1.2433, "step": 4920 }, { "epoch": 0.41, "grad_norm": 0.9177870154380798, "learning_rate": 9.603375881760162e-05, "loss": 1.2367, "step": 4930 }, { "epoch": 0.41, "grad_norm": 0.951474130153656, "learning_rate": 9.602536110178032e-05, "loss": 1.227, "step": 4940 }, { "epoch": 0.41, "grad_norm": 1.0126953125, "learning_rate": 9.601696338595903e-05, "loss": 1.2377, "step": 4950 }, { "epoch": 0.42, "grad_norm": 0.9180831909179688, "learning_rate": 9.600856567013773e-05, "loss": 1.2337, "step": 4960 }, { "epoch": 0.42, "grad_norm": 0.9630677700042725, "learning_rate": 9.600016795431643e-05, "loss": 1.2728, "step": 4970 }, { "epoch": 0.42, "grad_norm": 1.049486517906189, "learning_rate": 9.599177023849513e-05, "loss": 1.2572, "step": 4980 }, { "epoch": 0.42, "grad_norm": 0.9184420108795166, "learning_rate": 9.598337252267385e-05, "loss": 1.2352, "step": 4990 }, { "epoch": 0.42, "grad_norm": 0.9000878930091858, "learning_rate": 9.597497480685253e-05, "loss": 1.239, "step": 5000 }, { "epoch": 0.42, "grad_norm": 0.9162322282791138, "learning_rate": 9.596657709103123e-05, "loss": 1.2272, "step": 5010 }, { "epoch": 0.42, "grad_norm": 0.9241054654121399, "learning_rate": 9.595817937520995e-05, "loss": 1.2541, "step": 5020 }, { "epoch": 0.42, "grad_norm": 0.9283685088157654, "learning_rate": 9.594978165938865e-05, "loss": 1.2387, "step": 5030 }, { "epoch": 0.42, "grad_norm": 0.7603573203086853, "learning_rate": 9.594138394356735e-05, "loss": 1.2344, "step": 5040 }, { "epoch": 0.42, "grad_norm": 0.8442432880401611, "learning_rate": 9.593298622774606e-05, "loss": 1.2368, "step": 5050 }, { "epoch": 0.42, "grad_norm": 0.8469812273979187, "learning_rate": 9.592458851192477e-05, "loss": 1.2575, "step": 5060 }, { "epoch": 0.43, "grad_norm": 0.9024328589439392, "learning_rate": 9.591619079610347e-05, "loss": 1.2473, "step": 5070 }, { "epoch": 0.43, "grad_norm": 0.8618769645690918, "learning_rate": 9.590779308028217e-05, "loss": 1.259, "step": 5080 }, { "epoch": 0.43, "grad_norm": 0.8754567503929138, "learning_rate": 9.589939536446088e-05, "loss": 1.2315, "step": 5090 }, { "epoch": 0.43, "grad_norm": 0.9073194861412048, "learning_rate": 9.589099764863958e-05, "loss": 1.2162, "step": 5100 }, { "epoch": 0.43, "grad_norm": 0.8669455647468567, "learning_rate": 9.588259993281827e-05, "loss": 1.2629, "step": 5110 }, { "epoch": 0.43, "grad_norm": 0.9005383849143982, "learning_rate": 9.587420221699698e-05, "loss": 1.2393, "step": 5120 }, { "epoch": 0.43, "grad_norm": 0.8723963499069214, "learning_rate": 9.586580450117568e-05, "loss": 1.2485, "step": 5130 }, { "epoch": 0.43, "grad_norm": 0.9381632208824158, "learning_rate": 9.585740678535438e-05, "loss": 1.2426, "step": 5140 }, { "epoch": 0.43, "grad_norm": 0.8653701543807983, "learning_rate": 9.584900906953309e-05, "loss": 1.2514, "step": 5150 }, { "epoch": 0.43, "grad_norm": 0.9231656789779663, "learning_rate": 9.58406113537118e-05, "loss": 1.2246, "step": 5160 }, { "epoch": 0.43, "grad_norm": 0.9562845230102539, "learning_rate": 9.58322136378905e-05, "loss": 1.2323, "step": 5170 }, { "epoch": 0.43, "grad_norm": 0.9132975935935974, "learning_rate": 9.58238159220692e-05, "loss": 1.2111, "step": 5180 }, { "epoch": 0.44, "grad_norm": 0.9352735280990601, "learning_rate": 9.581541820624791e-05, "loss": 1.2388, "step": 5190 }, { "epoch": 0.44, "grad_norm": 0.9456208348274231, "learning_rate": 9.580702049042662e-05, "loss": 1.2243, "step": 5200 }, { "epoch": 0.44, "grad_norm": 0.9143781065940857, "learning_rate": 9.57986227746053e-05, "loss": 1.2381, "step": 5210 }, { "epoch": 0.44, "grad_norm": 1.0085742473602295, "learning_rate": 9.579022505878402e-05, "loss": 1.2352, "step": 5220 }, { "epoch": 0.44, "grad_norm": 0.8309242725372314, "learning_rate": 9.578182734296272e-05, "loss": 1.2511, "step": 5230 }, { "epoch": 0.44, "grad_norm": 0.9421424269676208, "learning_rate": 9.577342962714142e-05, "loss": 1.247, "step": 5240 }, { "epoch": 0.44, "grad_norm": 0.8936107158660889, "learning_rate": 9.576503191132012e-05, "loss": 1.23, "step": 5250 }, { "epoch": 0.44, "grad_norm": 0.8589064478874207, "learning_rate": 9.575663419549883e-05, "loss": 1.2556, "step": 5260 }, { "epoch": 0.44, "grad_norm": 0.9755228757858276, "learning_rate": 9.574823647967753e-05, "loss": 1.2399, "step": 5270 }, { "epoch": 0.44, "grad_norm": 0.9112702012062073, "learning_rate": 9.573983876385623e-05, "loss": 1.2396, "step": 5280 }, { "epoch": 0.44, "grad_norm": 0.964478075504303, "learning_rate": 9.573144104803495e-05, "loss": 1.2749, "step": 5290 }, { "epoch": 0.44, "grad_norm": 0.8301079869270325, "learning_rate": 9.572304333221365e-05, "loss": 1.2346, "step": 5300 }, { "epoch": 0.45, "grad_norm": 0.8552051782608032, "learning_rate": 9.571464561639235e-05, "loss": 1.2357, "step": 5310 }, { "epoch": 0.45, "grad_norm": 0.9006170630455017, "learning_rate": 9.570624790057105e-05, "loss": 1.2424, "step": 5320 }, { "epoch": 0.45, "grad_norm": 0.8143618702888489, "learning_rate": 9.569785018474975e-05, "loss": 1.2378, "step": 5330 }, { "epoch": 0.45, "grad_norm": 0.8069211840629578, "learning_rate": 9.568945246892845e-05, "loss": 1.228, "step": 5340 }, { "epoch": 0.45, "grad_norm": 0.9581374526023865, "learning_rate": 9.568105475310715e-05, "loss": 1.2525, "step": 5350 }, { "epoch": 0.45, "grad_norm": 1.02692711353302, "learning_rate": 9.567265703728587e-05, "loss": 1.2352, "step": 5360 }, { "epoch": 0.45, "grad_norm": 0.8993972539901733, "learning_rate": 9.566425932146457e-05, "loss": 1.2289, "step": 5370 }, { "epoch": 0.45, "grad_norm": 0.8615926504135132, "learning_rate": 9.565586160564327e-05, "loss": 1.2459, "step": 5380 }, { "epoch": 0.45, "grad_norm": 0.8434697985649109, "learning_rate": 9.564746388982197e-05, "loss": 1.2479, "step": 5390 }, { "epoch": 0.45, "grad_norm": 0.866491973400116, "learning_rate": 9.563906617400068e-05, "loss": 1.2404, "step": 5400 }, { "epoch": 0.45, "grad_norm": 0.8595972061157227, "learning_rate": 9.563066845817938e-05, "loss": 1.2231, "step": 5410 }, { "epoch": 0.45, "grad_norm": 0.918602466583252, "learning_rate": 9.562227074235807e-05, "loss": 1.2661, "step": 5420 }, { "epoch": 0.46, "grad_norm": 0.9388463497161865, "learning_rate": 9.561387302653679e-05, "loss": 1.2321, "step": 5430 }, { "epoch": 0.46, "grad_norm": 0.9377898573875427, "learning_rate": 9.560547531071549e-05, "loss": 1.2244, "step": 5440 }, { "epoch": 0.46, "grad_norm": 0.8881635069847107, "learning_rate": 9.559707759489419e-05, "loss": 1.2646, "step": 5450 }, { "epoch": 0.46, "grad_norm": 0.8622443079948425, "learning_rate": 9.55886798790729e-05, "loss": 1.2117, "step": 5460 }, { "epoch": 0.46, "grad_norm": 0.9005386829376221, "learning_rate": 9.55802821632516e-05, "loss": 1.2229, "step": 5470 }, { "epoch": 0.46, "grad_norm": 0.9114660024642944, "learning_rate": 9.55718844474303e-05, "loss": 1.2205, "step": 5480 }, { "epoch": 0.46, "grad_norm": 0.8409573435783386, "learning_rate": 9.5563486731609e-05, "loss": 1.2194, "step": 5490 }, { "epoch": 0.46, "grad_norm": 0.9288071990013123, "learning_rate": 9.555508901578772e-05, "loss": 1.2435, "step": 5500 }, { "epoch": 0.46, "grad_norm": 0.891653835773468, "learning_rate": 9.554669129996642e-05, "loss": 1.2262, "step": 5510 }, { "epoch": 0.46, "grad_norm": 0.875237762928009, "learning_rate": 9.553829358414512e-05, "loss": 1.2303, "step": 5520 }, { "epoch": 0.46, "grad_norm": 0.883129894733429, "learning_rate": 9.552989586832382e-05, "loss": 1.2264, "step": 5530 }, { "epoch": 0.46, "grad_norm": 0.8907957673072815, "learning_rate": 9.552149815250252e-05, "loss": 1.2255, "step": 5540 }, { "epoch": 0.47, "grad_norm": 0.8646265864372253, "learning_rate": 9.551310043668122e-05, "loss": 1.242, "step": 5550 }, { "epoch": 0.47, "grad_norm": 0.8700964450836182, "learning_rate": 9.550470272085994e-05, "loss": 1.2173, "step": 5560 }, { "epoch": 0.47, "grad_norm": 0.9184081554412842, "learning_rate": 9.549630500503864e-05, "loss": 1.2357, "step": 5570 }, { "epoch": 0.47, "grad_norm": 0.8421388268470764, "learning_rate": 9.548790728921734e-05, "loss": 1.2173, "step": 5580 }, { "epoch": 0.47, "grad_norm": 0.8952757120132446, "learning_rate": 9.547950957339604e-05, "loss": 1.2104, "step": 5590 }, { "epoch": 0.47, "grad_norm": 0.9406217336654663, "learning_rate": 9.547111185757475e-05, "loss": 1.2228, "step": 5600 }, { "epoch": 0.47, "grad_norm": 0.8921267986297607, "learning_rate": 9.546271414175345e-05, "loss": 1.2694, "step": 5610 }, { "epoch": 0.47, "grad_norm": 0.9374231696128845, "learning_rate": 9.545431642593215e-05, "loss": 1.2112, "step": 5620 }, { "epoch": 0.47, "grad_norm": 0.9043653011322021, "learning_rate": 9.544591871011085e-05, "loss": 1.2187, "step": 5630 }, { "epoch": 0.47, "grad_norm": 0.9351363182067871, "learning_rate": 9.543752099428955e-05, "loss": 1.243, "step": 5640 }, { "epoch": 0.47, "grad_norm": 0.8760493993759155, "learning_rate": 9.542912327846825e-05, "loss": 1.2347, "step": 5650 }, { "epoch": 0.47, "grad_norm": 0.8686032891273499, "learning_rate": 9.542072556264696e-05, "loss": 1.2262, "step": 5660 }, { "epoch": 0.48, "grad_norm": 0.897517204284668, "learning_rate": 9.541232784682567e-05, "loss": 1.2251, "step": 5670 }, { "epoch": 0.48, "grad_norm": 0.9855635762214661, "learning_rate": 9.540393013100437e-05, "loss": 1.2458, "step": 5680 }, { "epoch": 0.48, "grad_norm": 0.8823346495628357, "learning_rate": 9.539553241518307e-05, "loss": 1.2412, "step": 5690 }, { "epoch": 0.48, "grad_norm": 0.9292605519294739, "learning_rate": 9.538713469936179e-05, "loss": 1.2393, "step": 5700 }, { "epoch": 0.48, "grad_norm": 0.8954950571060181, "learning_rate": 9.537873698354049e-05, "loss": 1.212, "step": 5710 }, { "epoch": 0.48, "grad_norm": 0.8325024843215942, "learning_rate": 9.537033926771919e-05, "loss": 1.2385, "step": 5720 }, { "epoch": 0.48, "grad_norm": 1.3778843879699707, "learning_rate": 9.536194155189789e-05, "loss": 1.2217, "step": 5730 }, { "epoch": 0.48, "grad_norm": 1.133502721786499, "learning_rate": 9.535354383607659e-05, "loss": 1.2193, "step": 5740 }, { "epoch": 0.48, "grad_norm": 0.8802908062934875, "learning_rate": 9.534514612025529e-05, "loss": 1.2356, "step": 5750 }, { "epoch": 0.48, "grad_norm": 0.9083417057991028, "learning_rate": 9.533674840443399e-05, "loss": 1.2235, "step": 5760 }, { "epoch": 0.48, "grad_norm": 1.0076510906219482, "learning_rate": 9.53283506886127e-05, "loss": 1.2317, "step": 5770 }, { "epoch": 0.48, "grad_norm": 0.8874029517173767, "learning_rate": 9.53199529727914e-05, "loss": 1.2335, "step": 5780 }, { "epoch": 0.49, "grad_norm": 0.912630558013916, "learning_rate": 9.53115552569701e-05, "loss": 1.226, "step": 5790 }, { "epoch": 0.49, "grad_norm": 0.9173579216003418, "learning_rate": 9.530315754114882e-05, "loss": 1.2371, "step": 5800 }, { "epoch": 0.49, "grad_norm": 0.9296732544898987, "learning_rate": 9.529475982532752e-05, "loss": 1.2151, "step": 5810 }, { "epoch": 0.49, "grad_norm": 0.885964572429657, "learning_rate": 9.528636210950622e-05, "loss": 1.2064, "step": 5820 }, { "epoch": 0.49, "grad_norm": 0.880092203617096, "learning_rate": 9.527796439368492e-05, "loss": 1.1906, "step": 5830 }, { "epoch": 0.49, "grad_norm": 0.9691383242607117, "learning_rate": 9.526956667786364e-05, "loss": 1.2388, "step": 5840 }, { "epoch": 0.49, "grad_norm": 0.8995313048362732, "learning_rate": 9.526116896204232e-05, "loss": 1.2216, "step": 5850 }, { "epoch": 0.49, "grad_norm": 0.9122263193130493, "learning_rate": 9.525277124622102e-05, "loss": 1.209, "step": 5860 }, { "epoch": 0.49, "grad_norm": 0.9010337591171265, "learning_rate": 9.524437353039974e-05, "loss": 1.2196, "step": 5870 }, { "epoch": 0.49, "grad_norm": 0.842031717300415, "learning_rate": 9.523597581457844e-05, "loss": 1.2319, "step": 5880 }, { "epoch": 0.49, "grad_norm": 0.9067339897155762, "learning_rate": 9.522757809875714e-05, "loss": 1.2401, "step": 5890 }, { "epoch": 0.49, "grad_norm": 0.8489568829536438, "learning_rate": 9.521918038293584e-05, "loss": 1.2698, "step": 5900 }, { "epoch": 0.5, "grad_norm": 0.9444595575332642, "learning_rate": 9.521078266711455e-05, "loss": 1.2197, "step": 5910 }, { "epoch": 0.5, "grad_norm": 0.8654924631118774, "learning_rate": 9.520238495129325e-05, "loss": 1.2156, "step": 5920 }, { "epoch": 0.5, "grad_norm": 0.9583361744880676, "learning_rate": 9.519398723547196e-05, "loss": 1.2075, "step": 5930 }, { "epoch": 0.5, "grad_norm": 0.9642980694770813, "learning_rate": 9.518558951965067e-05, "loss": 1.2226, "step": 5940 }, { "epoch": 0.5, "grad_norm": 0.9107359647750854, "learning_rate": 9.517719180382936e-05, "loss": 1.2078, "step": 5950 }, { "epoch": 0.5, "grad_norm": 0.8962581157684326, "learning_rate": 9.516879408800806e-05, "loss": 1.2236, "step": 5960 }, { "epoch": 0.5, "grad_norm": 0.8890233635902405, "learning_rate": 9.516039637218677e-05, "loss": 1.2338, "step": 5970 }, { "epoch": 0.5, "grad_norm": 0.8808488249778748, "learning_rate": 9.515199865636547e-05, "loss": 1.2019, "step": 5980 }, { "epoch": 0.5, "grad_norm": 0.849258303642273, "learning_rate": 9.514360094054417e-05, "loss": 1.2294, "step": 5990 }, { "epoch": 0.5, "grad_norm": 0.9754767417907715, "learning_rate": 9.513520322472287e-05, "loss": 1.2507, "step": 6000 }, { "epoch": 0.5, "grad_norm": 0.9048311114311218, "learning_rate": 9.512680550890159e-05, "loss": 1.2346, "step": 6010 }, { "epoch": 0.5, "grad_norm": 0.8853904008865356, "learning_rate": 9.511840779308029e-05, "loss": 1.2564, "step": 6020 }, { "epoch": 0.51, "grad_norm": 0.9003530740737915, "learning_rate": 9.511001007725899e-05, "loss": 1.2243, "step": 6030 }, { "epoch": 0.51, "grad_norm": 0.9003586769104004, "learning_rate": 9.51016123614377e-05, "loss": 1.2255, "step": 6040 }, { "epoch": 0.51, "grad_norm": 0.9166625142097473, "learning_rate": 9.50932146456164e-05, "loss": 1.1941, "step": 6050 }, { "epoch": 0.51, "grad_norm": 1.0153764486312866, "learning_rate": 9.508481692979509e-05, "loss": 1.2123, "step": 6060 }, { "epoch": 0.51, "grad_norm": 0.9264450669288635, "learning_rate": 9.50764192139738e-05, "loss": 1.2143, "step": 6070 }, { "epoch": 0.51, "grad_norm": 0.9054614901542664, "learning_rate": 9.50680214981525e-05, "loss": 1.2263, "step": 6080 }, { "epoch": 0.51, "grad_norm": 0.8641769289970398, "learning_rate": 9.505962378233121e-05, "loss": 1.248, "step": 6090 }, { "epoch": 0.51, "grad_norm": 0.8716381788253784, "learning_rate": 9.505206583809204e-05, "loss": 1.2096, "step": 6100 }, { "epoch": 0.51, "grad_norm": 0.9181687235832214, "learning_rate": 9.504366812227074e-05, "loss": 1.2317, "step": 6110 }, { "epoch": 0.51, "grad_norm": 0.9484494924545288, "learning_rate": 9.503527040644946e-05, "loss": 1.2132, "step": 6120 }, { "epoch": 0.51, "grad_norm": 0.9860294461250305, "learning_rate": 9.502687269062816e-05, "loss": 1.2321, "step": 6130 }, { "epoch": 0.51, "grad_norm": 0.9444969892501831, "learning_rate": 9.501847497480686e-05, "loss": 1.2285, "step": 6140 }, { "epoch": 0.52, "grad_norm": 0.9693177938461304, "learning_rate": 9.501007725898556e-05, "loss": 1.2148, "step": 6150 }, { "epoch": 0.52, "grad_norm": 1.2146936655044556, "learning_rate": 9.500167954316427e-05, "loss": 1.2278, "step": 6160 }, { "epoch": 0.52, "grad_norm": 0.9109039902687073, "learning_rate": 9.499328182734297e-05, "loss": 1.2481, "step": 6170 }, { "epoch": 0.52, "grad_norm": 0.9141036868095398, "learning_rate": 9.498488411152166e-05, "loss": 1.1948, "step": 6180 }, { "epoch": 0.52, "grad_norm": 0.9329163432121277, "learning_rate": 9.497648639570038e-05, "loss": 1.202, "step": 6190 }, { "epoch": 0.52, "grad_norm": 0.9124045968055725, "learning_rate": 9.496808867987908e-05, "loss": 1.2195, "step": 6200 }, { "epoch": 0.52, "grad_norm": 0.8825882077217102, "learning_rate": 9.495969096405778e-05, "loss": 1.2147, "step": 6210 }, { "epoch": 0.52, "grad_norm": 0.9746090173721313, "learning_rate": 9.495129324823648e-05, "loss": 1.213, "step": 6220 }, { "epoch": 0.52, "grad_norm": 0.850538432598114, "learning_rate": 9.494289553241519e-05, "loss": 1.2326, "step": 6230 }, { "epoch": 0.52, "grad_norm": 0.9482828974723816, "learning_rate": 9.493449781659389e-05, "loss": 1.2033, "step": 6240 }, { "epoch": 0.52, "grad_norm": 0.9356690645217896, "learning_rate": 9.492610010077259e-05, "loss": 1.224, "step": 6250 }, { "epoch": 0.52, "grad_norm": 0.794873833656311, "learning_rate": 9.491770238495131e-05, "loss": 1.2311, "step": 6260 }, { "epoch": 0.53, "grad_norm": 0.9129030704498291, "learning_rate": 9.490930466913001e-05, "loss": 1.2166, "step": 6270 }, { "epoch": 0.53, "grad_norm": 0.8686519861221313, "learning_rate": 9.49009069533087e-05, "loss": 1.2195, "step": 6280 }, { "epoch": 0.53, "grad_norm": 0.9123719334602356, "learning_rate": 9.489250923748741e-05, "loss": 1.2023, "step": 6290 }, { "epoch": 0.53, "grad_norm": 1.3565350770950317, "learning_rate": 9.488411152166611e-05, "loss": 1.2282, "step": 6300 }, { "epoch": 0.53, "grad_norm": 0.8791282773017883, "learning_rate": 9.487571380584481e-05, "loss": 1.2231, "step": 6310 }, { "epoch": 0.53, "grad_norm": 0.8691518902778625, "learning_rate": 9.486731609002351e-05, "loss": 1.221, "step": 6320 }, { "epoch": 0.53, "grad_norm": 0.9510216116905212, "learning_rate": 9.485891837420223e-05, "loss": 1.2005, "step": 6330 }, { "epoch": 0.53, "grad_norm": 0.8951911926269531, "learning_rate": 9.485052065838093e-05, "loss": 1.1917, "step": 6340 }, { "epoch": 0.53, "grad_norm": 0.9834571480751038, "learning_rate": 9.484212294255963e-05, "loss": 1.203, "step": 6350 }, { "epoch": 0.53, "grad_norm": 0.8950636386871338, "learning_rate": 9.483372522673834e-05, "loss": 1.202, "step": 6360 }, { "epoch": 0.53, "grad_norm": 0.9080728888511658, "learning_rate": 9.482532751091704e-05, "loss": 1.2099, "step": 6370 }, { "epoch": 0.53, "grad_norm": 0.8755112290382385, "learning_rate": 9.481692979509574e-05, "loss": 1.2189, "step": 6380 }, { "epoch": 0.54, "grad_norm": 0.8602133989334106, "learning_rate": 9.480853207927444e-05, "loss": 1.2055, "step": 6390 }, { "epoch": 0.54, "grad_norm": 1.0315263271331787, "learning_rate": 9.480013436345314e-05, "loss": 1.2011, "step": 6400 }, { "epoch": 0.54, "grad_norm": 0.82036954164505, "learning_rate": 9.479173664763184e-05, "loss": 1.1974, "step": 6410 }, { "epoch": 0.54, "grad_norm": 1.011440396308899, "learning_rate": 9.478333893181055e-05, "loss": 1.2138, "step": 6420 }, { "epoch": 0.54, "grad_norm": 0.9471117258071899, "learning_rate": 9.477494121598926e-05, "loss": 1.2089, "step": 6430 }, { "epoch": 0.54, "grad_norm": 0.8859214186668396, "learning_rate": 9.476654350016796e-05, "loss": 1.2032, "step": 6440 }, { "epoch": 0.54, "grad_norm": 0.9290270209312439, "learning_rate": 9.475814578434666e-05, "loss": 1.22, "step": 6450 }, { "epoch": 0.54, "grad_norm": 0.8193401098251343, "learning_rate": 9.474974806852536e-05, "loss": 1.2009, "step": 6460 }, { "epoch": 0.54, "grad_norm": 0.91757732629776, "learning_rate": 9.474135035270408e-05, "loss": 1.2186, "step": 6470 }, { "epoch": 0.54, "grad_norm": 0.834231436252594, "learning_rate": 9.473295263688278e-05, "loss": 1.2395, "step": 6480 }, { "epoch": 0.54, "grad_norm": 0.912388801574707, "learning_rate": 9.472455492106148e-05, "loss": 1.211, "step": 6490 }, { "epoch": 0.54, "grad_norm": 0.8885015249252319, "learning_rate": 9.471615720524018e-05, "loss": 1.214, "step": 6500 }, { "epoch": 0.55, "grad_norm": 0.9009397625923157, "learning_rate": 9.470775948941888e-05, "loss": 1.2304, "step": 6510 }, { "epoch": 0.55, "grad_norm": 0.906132161617279, "learning_rate": 9.469936177359758e-05, "loss": 1.2052, "step": 6520 }, { "epoch": 0.55, "grad_norm": 0.8399680256843567, "learning_rate": 9.46909640577763e-05, "loss": 1.205, "step": 6530 }, { "epoch": 0.55, "grad_norm": 0.8931518197059631, "learning_rate": 9.4682566341955e-05, "loss": 1.22, "step": 6540 }, { "epoch": 0.55, "grad_norm": 0.8453435897827148, "learning_rate": 9.46741686261337e-05, "loss": 1.2252, "step": 6550 }, { "epoch": 0.55, "grad_norm": 0.885593831539154, "learning_rate": 9.46657709103124e-05, "loss": 1.2406, "step": 6560 }, { "epoch": 0.55, "grad_norm": 0.9239007830619812, "learning_rate": 9.465737319449111e-05, "loss": 1.2295, "step": 6570 }, { "epoch": 0.55, "grad_norm": 0.9348042607307434, "learning_rate": 9.464897547866981e-05, "loss": 1.1996, "step": 6580 }, { "epoch": 0.55, "grad_norm": 0.8948506116867065, "learning_rate": 9.464057776284851e-05, "loss": 1.193, "step": 6590 }, { "epoch": 0.55, "grad_norm": 0.8650892376899719, "learning_rate": 9.463218004702721e-05, "loss": 1.2212, "step": 6600 }, { "epoch": 0.55, "grad_norm": 0.8444058895111084, "learning_rate": 9.462378233120591e-05, "loss": 1.2107, "step": 6610 }, { "epoch": 0.55, "grad_norm": 0.9454925060272217, "learning_rate": 9.461538461538461e-05, "loss": 1.2147, "step": 6620 }, { "epoch": 0.56, "grad_norm": 0.9183284044265747, "learning_rate": 9.460698689956333e-05, "loss": 1.2043, "step": 6630 }, { "epoch": 0.56, "grad_norm": 0.900619626045227, "learning_rate": 9.459858918374203e-05, "loss": 1.2367, "step": 6640 }, { "epoch": 0.56, "grad_norm": 0.8767586946487427, "learning_rate": 9.459019146792073e-05, "loss": 1.1921, "step": 6650 }, { "epoch": 0.56, "grad_norm": 0.9866166710853577, "learning_rate": 9.458179375209943e-05, "loss": 1.2297, "step": 6660 }, { "epoch": 0.56, "grad_norm": 0.9303044676780701, "learning_rate": 9.457339603627814e-05, "loss": 1.2016, "step": 6670 }, { "epoch": 0.56, "grad_norm": 0.9829623699188232, "learning_rate": 9.456499832045684e-05, "loss": 1.2117, "step": 6680 }, { "epoch": 0.56, "grad_norm": 0.8491721153259277, "learning_rate": 9.455660060463555e-05, "loss": 1.2002, "step": 6690 }, { "epoch": 0.56, "grad_norm": 0.9353068470954895, "learning_rate": 9.454820288881425e-05, "loss": 1.1971, "step": 6700 }, { "epoch": 0.56, "grad_norm": 0.8836992383003235, "learning_rate": 9.453980517299295e-05, "loss": 1.2072, "step": 6710 }, { "epoch": 0.56, "grad_norm": 0.9184627532958984, "learning_rate": 9.453140745717165e-05, "loss": 1.2088, "step": 6720 }, { "epoch": 0.56, "grad_norm": 0.9549211859703064, "learning_rate": 9.452300974135035e-05, "loss": 1.2093, "step": 6730 }, { "epoch": 0.57, "grad_norm": 0.8630688190460205, "learning_rate": 9.451461202552906e-05, "loss": 1.2306, "step": 6740 }, { "epoch": 0.57, "grad_norm": 0.8126599192619324, "learning_rate": 9.450621430970776e-05, "loss": 1.2013, "step": 6750 }, { "epoch": 0.57, "grad_norm": 0.930507242679596, "learning_rate": 9.449781659388646e-05, "loss": 1.1965, "step": 6760 }, { "epoch": 0.57, "grad_norm": 0.9245671033859253, "learning_rate": 9.448941887806518e-05, "loss": 1.2054, "step": 6770 }, { "epoch": 0.57, "grad_norm": 0.9427335858345032, "learning_rate": 9.448102116224388e-05, "loss": 1.1885, "step": 6780 }, { "epoch": 0.57, "grad_norm": 0.8826670050621033, "learning_rate": 9.447262344642258e-05, "loss": 1.196, "step": 6790 }, { "epoch": 0.57, "grad_norm": 0.8583161234855652, "learning_rate": 9.446422573060128e-05, "loss": 1.2018, "step": 6800 }, { "epoch": 0.57, "grad_norm": 0.8399899005889893, "learning_rate": 9.445582801477998e-05, "loss": 1.1759, "step": 6810 }, { "epoch": 0.57, "grad_norm": 0.9219769835472107, "learning_rate": 9.444743029895868e-05, "loss": 1.1996, "step": 6820 }, { "epoch": 0.57, "grad_norm": 0.8673033118247986, "learning_rate": 9.443903258313738e-05, "loss": 1.2203, "step": 6830 }, { "epoch": 0.57, "grad_norm": 0.8609717488288879, "learning_rate": 9.44306348673161e-05, "loss": 1.2273, "step": 6840 }, { "epoch": 0.57, "grad_norm": 0.8942599892616272, "learning_rate": 9.44222371514948e-05, "loss": 1.2236, "step": 6850 }, { "epoch": 0.58, "grad_norm": 0.8240846395492554, "learning_rate": 9.44138394356735e-05, "loss": 1.215, "step": 6860 }, { "epoch": 0.58, "grad_norm": 0.8023459911346436, "learning_rate": 9.440544171985221e-05, "loss": 1.2105, "step": 6870 }, { "epoch": 0.58, "grad_norm": 0.9448633193969727, "learning_rate": 9.439704400403091e-05, "loss": 1.1919, "step": 6880 }, { "epoch": 0.58, "grad_norm": 0.8635557293891907, "learning_rate": 9.438864628820961e-05, "loss": 1.1764, "step": 6890 }, { "epoch": 0.58, "grad_norm": 0.8735735416412354, "learning_rate": 9.438024857238831e-05, "loss": 1.225, "step": 6900 }, { "epoch": 0.58, "grad_norm": 0.8482799530029297, "learning_rate": 9.437185085656703e-05, "loss": 1.2107, "step": 6910 }, { "epoch": 0.58, "grad_norm": 0.9306829571723938, "learning_rate": 9.436345314074572e-05, "loss": 1.2181, "step": 6920 }, { "epoch": 0.58, "grad_norm": 0.88686603307724, "learning_rate": 9.435505542492442e-05, "loss": 1.1963, "step": 6930 }, { "epoch": 0.58, "grad_norm": 0.8896923065185547, "learning_rate": 9.434665770910313e-05, "loss": 1.1999, "step": 6940 }, { "epoch": 0.58, "grad_norm": 0.7939994931221008, "learning_rate": 9.433825999328183e-05, "loss": 1.1835, "step": 6950 }, { "epoch": 0.58, "grad_norm": 0.9401127099990845, "learning_rate": 9.432986227746053e-05, "loss": 1.1855, "step": 6960 }, { "epoch": 0.58, "grad_norm": 0.9034574031829834, "learning_rate": 9.432146456163923e-05, "loss": 1.1748, "step": 6970 }, { "epoch": 0.59, "grad_norm": 0.8599429726600647, "learning_rate": 9.431306684581795e-05, "loss": 1.1934, "step": 6980 }, { "epoch": 0.59, "grad_norm": 0.9708506464958191, "learning_rate": 9.430466912999665e-05, "loss": 1.1799, "step": 6990 }, { "epoch": 0.59, "grad_norm": 0.9697883129119873, "learning_rate": 9.429627141417535e-05, "loss": 1.2021, "step": 7000 }, { "epoch": 0.59, "grad_norm": 0.8883392214775085, "learning_rate": 9.428787369835406e-05, "loss": 1.1806, "step": 7010 }, { "epoch": 0.59, "grad_norm": 0.9711785912513733, "learning_rate": 9.427947598253276e-05, "loss": 1.2261, "step": 7020 }, { "epoch": 0.59, "grad_norm": 0.9639722108840942, "learning_rate": 9.427107826671145e-05, "loss": 1.193, "step": 7030 }, { "epoch": 0.59, "grad_norm": 0.9306802749633789, "learning_rate": 9.426268055089016e-05, "loss": 1.2203, "step": 7040 }, { "epoch": 0.59, "grad_norm": 0.8337755799293518, "learning_rate": 9.425428283506886e-05, "loss": 1.2426, "step": 7050 }, { "epoch": 0.59, "grad_norm": 0.83501797914505, "learning_rate": 9.424588511924757e-05, "loss": 1.2126, "step": 7060 }, { "epoch": 0.59, "grad_norm": 0.9699921011924744, "learning_rate": 9.423748740342627e-05, "loss": 1.2122, "step": 7070 }, { "epoch": 0.59, "grad_norm": 0.9536002278327942, "learning_rate": 9.422908968760498e-05, "loss": 1.1994, "step": 7080 }, { "epoch": 0.59, "grad_norm": 0.9766404032707214, "learning_rate": 9.422069197178368e-05, "loss": 1.2346, "step": 7090 }, { "epoch": 0.6, "grad_norm": 0.8767348527908325, "learning_rate": 9.421229425596238e-05, "loss": 1.1839, "step": 7100 }, { "epoch": 0.6, "grad_norm": 0.923218309879303, "learning_rate": 9.42038965401411e-05, "loss": 1.1978, "step": 7110 }, { "epoch": 0.6, "grad_norm": 0.9360674619674683, "learning_rate": 9.41954988243198e-05, "loss": 1.2313, "step": 7120 }, { "epoch": 0.6, "grad_norm": 0.9123576283454895, "learning_rate": 9.418710110849848e-05, "loss": 1.1958, "step": 7130 }, { "epoch": 0.6, "grad_norm": 0.8900172114372253, "learning_rate": 9.41787033926772e-05, "loss": 1.1824, "step": 7140 }, { "epoch": 0.6, "grad_norm": 0.9195378422737122, "learning_rate": 9.41703056768559e-05, "loss": 1.2176, "step": 7150 }, { "epoch": 0.6, "grad_norm": 0.8248353600502014, "learning_rate": 9.41619079610346e-05, "loss": 1.2058, "step": 7160 }, { "epoch": 0.6, "grad_norm": 1.137145757675171, "learning_rate": 9.41535102452133e-05, "loss": 1.2013, "step": 7170 }, { "epoch": 0.6, "grad_norm": 0.9338940382003784, "learning_rate": 9.414511252939201e-05, "loss": 1.2069, "step": 7180 }, { "epoch": 0.6, "grad_norm": 0.9436649680137634, "learning_rate": 9.413671481357072e-05, "loss": 1.2138, "step": 7190 }, { "epoch": 0.6, "grad_norm": 0.867423951625824, "learning_rate": 9.412831709774942e-05, "loss": 1.2204, "step": 7200 }, { "epoch": 0.6, "grad_norm": 0.8983123898506165, "learning_rate": 9.411991938192812e-05, "loss": 1.1771, "step": 7210 }, { "epoch": 0.61, "grad_norm": 0.9414335489273071, "learning_rate": 9.411152166610683e-05, "loss": 1.1896, "step": 7220 }, { "epoch": 0.61, "grad_norm": 0.8820247650146484, "learning_rate": 9.410312395028553e-05, "loss": 1.1988, "step": 7230 }, { "epoch": 0.61, "grad_norm": 0.9329501986503601, "learning_rate": 9.409472623446423e-05, "loss": 1.1778, "step": 7240 }, { "epoch": 0.61, "grad_norm": 0.8825410008430481, "learning_rate": 9.408632851864293e-05, "loss": 1.2103, "step": 7250 }, { "epoch": 0.61, "grad_norm": 0.8636690974235535, "learning_rate": 9.407793080282163e-05, "loss": 1.2002, "step": 7260 }, { "epoch": 0.61, "grad_norm": 0.8834033012390137, "learning_rate": 9.406953308700033e-05, "loss": 1.1957, "step": 7270 }, { "epoch": 0.61, "grad_norm": 0.9180720448493958, "learning_rate": 9.406113537117905e-05, "loss": 1.2097, "step": 7280 }, { "epoch": 0.61, "grad_norm": 0.8845052719116211, "learning_rate": 9.405273765535775e-05, "loss": 1.174, "step": 7290 }, { "epoch": 0.61, "grad_norm": 0.934151828289032, "learning_rate": 9.404433993953645e-05, "loss": 1.2053, "step": 7300 }, { "epoch": 0.61, "grad_norm": 0.9797338843345642, "learning_rate": 9.403594222371515e-05, "loss": 1.1866, "step": 7310 }, { "epoch": 0.61, "grad_norm": 0.8769814968109131, "learning_rate": 9.402754450789386e-05, "loss": 1.1849, "step": 7320 }, { "epoch": 0.61, "grad_norm": 0.9255028963088989, "learning_rate": 9.401914679207257e-05, "loss": 1.1953, "step": 7330 }, { "epoch": 0.62, "grad_norm": 0.8942875862121582, "learning_rate": 9.401074907625125e-05, "loss": 1.191, "step": 7340 }, { "epoch": 0.62, "grad_norm": 0.9048584699630737, "learning_rate": 9.400235136042997e-05, "loss": 1.1908, "step": 7350 }, { "epoch": 0.62, "grad_norm": 0.8529855012893677, "learning_rate": 9.399395364460867e-05, "loss": 1.1891, "step": 7360 }, { "epoch": 0.62, "grad_norm": 0.8380342125892639, "learning_rate": 9.398555592878737e-05, "loss": 1.1909, "step": 7370 }, { "epoch": 0.62, "grad_norm": 0.9068658351898193, "learning_rate": 9.397715821296608e-05, "loss": 1.1929, "step": 7380 }, { "epoch": 0.62, "grad_norm": 0.9368525147438049, "learning_rate": 9.396876049714478e-05, "loss": 1.202, "step": 7390 }, { "epoch": 0.62, "grad_norm": 0.8885350227355957, "learning_rate": 9.396036278132348e-05, "loss": 1.2079, "step": 7400 }, { "epoch": 0.62, "grad_norm": 0.9168792963027954, "learning_rate": 9.395196506550218e-05, "loss": 1.1953, "step": 7410 }, { "epoch": 0.62, "grad_norm": 0.8806029558181763, "learning_rate": 9.39435673496809e-05, "loss": 1.1994, "step": 7420 }, { "epoch": 0.62, "grad_norm": 0.9570105075836182, "learning_rate": 9.39351696338596e-05, "loss": 1.1827, "step": 7430 }, { "epoch": 0.62, "grad_norm": 0.845789909362793, "learning_rate": 9.39267719180383e-05, "loss": 1.1948, "step": 7440 }, { "epoch": 0.62, "grad_norm": 0.9345555901527405, "learning_rate": 9.3918374202217e-05, "loss": 1.207, "step": 7450 }, { "epoch": 0.63, "grad_norm": 0.809235692024231, "learning_rate": 9.39099764863957e-05, "loss": 1.2255, "step": 7460 }, { "epoch": 0.63, "grad_norm": 0.9185549020767212, "learning_rate": 9.39015787705744e-05, "loss": 1.2116, "step": 7470 }, { "epoch": 0.63, "grad_norm": 0.9820988178253174, "learning_rate": 9.389318105475312e-05, "loss": 1.2023, "step": 7480 }, { "epoch": 0.63, "grad_norm": 0.8993212580680847, "learning_rate": 9.388478333893182e-05, "loss": 1.1936, "step": 7490 }, { "epoch": 0.63, "grad_norm": 0.8562648892402649, "learning_rate": 9.387638562311052e-05, "loss": 1.1738, "step": 7500 }, { "epoch": 0.63, "grad_norm": 0.9744245409965515, "learning_rate": 9.386798790728922e-05, "loss": 1.1874, "step": 7510 }, { "epoch": 0.63, "grad_norm": 0.8549754619598389, "learning_rate": 9.385959019146793e-05, "loss": 1.2091, "step": 7520 }, { "epoch": 0.63, "grad_norm": 0.9932068586349487, "learning_rate": 9.385119247564663e-05, "loss": 1.1886, "step": 7530 }, { "epoch": 0.63, "grad_norm": 0.9793296456336975, "learning_rate": 9.384279475982533e-05, "loss": 1.2022, "step": 7540 }, { "epoch": 0.63, "grad_norm": 0.9506256580352783, "learning_rate": 9.383439704400403e-05, "loss": 1.1937, "step": 7550 }, { "epoch": 0.63, "grad_norm": 0.8991612792015076, "learning_rate": 9.382599932818274e-05, "loss": 1.1652, "step": 7560 }, { "epoch": 0.63, "grad_norm": 0.9225620627403259, "learning_rate": 9.381760161236144e-05, "loss": 1.1848, "step": 7570 }, { "epoch": 0.64, "grad_norm": 0.8967307209968567, "learning_rate": 9.380920389654014e-05, "loss": 1.2174, "step": 7580 }, { "epoch": 0.64, "grad_norm": 0.936236560344696, "learning_rate": 9.380080618071885e-05, "loss": 1.1914, "step": 7590 }, { "epoch": 0.64, "grad_norm": 0.8682725429534912, "learning_rate": 9.379240846489755e-05, "loss": 1.2043, "step": 7600 }, { "epoch": 0.64, "grad_norm": 0.9163506031036377, "learning_rate": 9.378401074907625e-05, "loss": 1.1937, "step": 7610 }, { "epoch": 0.64, "grad_norm": 0.9057551622390747, "learning_rate": 9.377561303325497e-05, "loss": 1.2134, "step": 7620 }, { "epoch": 0.64, "grad_norm": 0.8398305773735046, "learning_rate": 9.376721531743367e-05, "loss": 1.1748, "step": 7630 }, { "epoch": 0.64, "grad_norm": 0.9422807097434998, "learning_rate": 9.375881760161237e-05, "loss": 1.1899, "step": 7640 }, { "epoch": 0.64, "grad_norm": 0.9004360437393188, "learning_rate": 9.375041988579107e-05, "loss": 1.171, "step": 7650 }, { "epoch": 0.64, "grad_norm": 1.0320866107940674, "learning_rate": 9.374202216996977e-05, "loss": 1.1797, "step": 7660 }, { "epoch": 0.64, "grad_norm": 0.8690953850746155, "learning_rate": 9.373362445414847e-05, "loss": 1.1685, "step": 7670 }, { "epoch": 0.64, "grad_norm": 0.808520495891571, "learning_rate": 9.372522673832717e-05, "loss": 1.1722, "step": 7680 }, { "epoch": 0.64, "grad_norm": 0.9039348363876343, "learning_rate": 9.371682902250589e-05, "loss": 1.2031, "step": 7690 }, { "epoch": 0.65, "grad_norm": 0.9256361126899719, "learning_rate": 9.370843130668459e-05, "loss": 1.1916, "step": 7700 }, { "epoch": 0.65, "grad_norm": 0.9331594705581665, "learning_rate": 9.370003359086329e-05, "loss": 1.1875, "step": 7710 }, { "epoch": 0.65, "grad_norm": 1.0034734010696411, "learning_rate": 9.3691635875042e-05, "loss": 1.1893, "step": 7720 }, { "epoch": 0.65, "grad_norm": 0.9952471852302551, "learning_rate": 9.36832381592207e-05, "loss": 1.1856, "step": 7730 }, { "epoch": 0.65, "grad_norm": 0.8371817469596863, "learning_rate": 9.36748404433994e-05, "loss": 1.2, "step": 7740 }, { "epoch": 0.65, "grad_norm": 0.8598681092262268, "learning_rate": 9.36664427275781e-05, "loss": 1.201, "step": 7750 }, { "epoch": 0.65, "grad_norm": 0.8538065552711487, "learning_rate": 9.365804501175682e-05, "loss": 1.1881, "step": 7760 }, { "epoch": 0.65, "grad_norm": 0.8651682138442993, "learning_rate": 9.36496472959355e-05, "loss": 1.1929, "step": 7770 }, { "epoch": 0.65, "grad_norm": 0.9020833969116211, "learning_rate": 9.36412495801142e-05, "loss": 1.1998, "step": 7780 }, { "epoch": 0.65, "grad_norm": 0.8552895188331604, "learning_rate": 9.363285186429292e-05, "loss": 1.188, "step": 7790 }, { "epoch": 0.65, "grad_norm": 0.929107666015625, "learning_rate": 9.362445414847162e-05, "loss": 1.2101, "step": 7800 }, { "epoch": 0.65, "grad_norm": 0.8756254315376282, "learning_rate": 9.361605643265032e-05, "loss": 1.1829, "step": 7810 }, { "epoch": 0.66, "grad_norm": 0.8464421033859253, "learning_rate": 9.360765871682902e-05, "loss": 1.1875, "step": 7820 }, { "epoch": 0.66, "grad_norm": 0.9108934998512268, "learning_rate": 9.359926100100774e-05, "loss": 1.193, "step": 7830 }, { "epoch": 0.66, "grad_norm": 0.8196065425872803, "learning_rate": 9.359086328518644e-05, "loss": 1.1815, "step": 7840 }, { "epoch": 0.66, "grad_norm": 0.9362756013870239, "learning_rate": 9.358246556936514e-05, "loss": 1.19, "step": 7850 }, { "epoch": 0.66, "grad_norm": 1.0048913955688477, "learning_rate": 9.357406785354385e-05, "loss": 1.1869, "step": 7860 }, { "epoch": 0.66, "grad_norm": 0.8194365501403809, "learning_rate": 9.356567013772254e-05, "loss": 1.1872, "step": 7870 }, { "epoch": 0.66, "grad_norm": 0.9167453050613403, "learning_rate": 9.355727242190124e-05, "loss": 1.2053, "step": 7880 }, { "epoch": 0.66, "grad_norm": 1.0007411241531372, "learning_rate": 9.354887470607995e-05, "loss": 1.2083, "step": 7890 }, { "epoch": 0.66, "grad_norm": 0.9240213632583618, "learning_rate": 9.354047699025865e-05, "loss": 1.1961, "step": 7900 }, { "epoch": 0.66, "grad_norm": 0.8382523059844971, "learning_rate": 9.353207927443735e-05, "loss": 1.2134, "step": 7910 }, { "epoch": 0.66, "grad_norm": 0.9030473232269287, "learning_rate": 9.352368155861606e-05, "loss": 1.1787, "step": 7920 }, { "epoch": 0.66, "grad_norm": 0.9283421039581299, "learning_rate": 9.351528384279477e-05, "loss": 1.1609, "step": 7930 }, { "epoch": 0.67, "grad_norm": 0.9871938228607178, "learning_rate": 9.350688612697347e-05, "loss": 1.2078, "step": 7940 }, { "epoch": 0.67, "grad_norm": 0.8591379523277283, "learning_rate": 9.349848841115217e-05, "loss": 1.1909, "step": 7950 }, { "epoch": 0.67, "grad_norm": 0.8470222353935242, "learning_rate": 9.349009069533089e-05, "loss": 1.2113, "step": 7960 }, { "epoch": 0.67, "grad_norm": 0.8578379154205322, "learning_rate": 9.348169297950959e-05, "loss": 1.1959, "step": 7970 }, { "epoch": 0.67, "grad_norm": 0.8939791917800903, "learning_rate": 9.347329526368827e-05, "loss": 1.1721, "step": 7980 }, { "epoch": 0.67, "grad_norm": 0.8363282084465027, "learning_rate": 9.346489754786699e-05, "loss": 1.1505, "step": 7990 }, { "epoch": 0.67, "grad_norm": 0.9319391846656799, "learning_rate": 9.345649983204569e-05, "loss": 1.183, "step": 8000 }, { "epoch": 0.67, "grad_norm": 0.8514727354049683, "learning_rate": 9.344810211622439e-05, "loss": 1.1785, "step": 8010 }, { "epoch": 0.67, "grad_norm": 0.8914164304733276, "learning_rate": 9.343970440040309e-05, "loss": 1.1893, "step": 8020 }, { "epoch": 0.67, "grad_norm": 0.9012137055397034, "learning_rate": 9.34313066845818e-05, "loss": 1.1586, "step": 8030 }, { "epoch": 0.67, "grad_norm": 0.892142653465271, "learning_rate": 9.34229089687605e-05, "loss": 1.1858, "step": 8040 }, { "epoch": 0.67, "grad_norm": 0.886681079864502, "learning_rate": 9.34145112529392e-05, "loss": 1.1945, "step": 8050 }, { "epoch": 0.68, "grad_norm": 0.8880460262298584, "learning_rate": 9.34061135371179e-05, "loss": 1.186, "step": 8060 }, { "epoch": 0.68, "grad_norm": 0.9022466540336609, "learning_rate": 9.339771582129662e-05, "loss": 1.1829, "step": 8070 }, { "epoch": 0.68, "grad_norm": 0.9207655787467957, "learning_rate": 9.338931810547531e-05, "loss": 1.1982, "step": 8080 }, { "epoch": 0.68, "grad_norm": 0.8897063136100769, "learning_rate": 9.338092038965401e-05, "loss": 1.1957, "step": 8090 }, { "epoch": 0.68, "grad_norm": 0.8721984624862671, "learning_rate": 9.337252267383272e-05, "loss": 1.1866, "step": 8100 }, { "epoch": 0.68, "grad_norm": 0.9680582284927368, "learning_rate": 9.336412495801142e-05, "loss": 1.1982, "step": 8110 }, { "epoch": 0.68, "grad_norm": 0.8685674071311951, "learning_rate": 9.335572724219012e-05, "loss": 1.1627, "step": 8120 }, { "epoch": 0.68, "grad_norm": 0.8975292444229126, "learning_rate": 9.334732952636884e-05, "loss": 1.1856, "step": 8130 }, { "epoch": 0.68, "grad_norm": 0.8757001161575317, "learning_rate": 9.333893181054754e-05, "loss": 1.1939, "step": 8140 }, { "epoch": 0.68, "grad_norm": 0.9586355090141296, "learning_rate": 9.333053409472624e-05, "loss": 1.1728, "step": 8150 }, { "epoch": 0.68, "grad_norm": 0.8423217535018921, "learning_rate": 9.332213637890494e-05, "loss": 1.1613, "step": 8160 }, { "epoch": 0.68, "grad_norm": 0.884924054145813, "learning_rate": 9.331373866308365e-05, "loss": 1.1895, "step": 8170 }, { "epoch": 0.69, "grad_norm": 0.856117844581604, "learning_rate": 9.330534094726235e-05, "loss": 1.2027, "step": 8180 }, { "epoch": 0.69, "grad_norm": 0.8681750893592834, "learning_rate": 9.329694323144104e-05, "loss": 1.1808, "step": 8190 }, { "epoch": 0.69, "grad_norm": 0.9215348362922668, "learning_rate": 9.328854551561976e-05, "loss": 1.1862, "step": 8200 }, { "epoch": 0.69, "grad_norm": 0.9275267720222473, "learning_rate": 9.328014779979846e-05, "loss": 1.1984, "step": 8210 }, { "epoch": 0.69, "grad_norm": 0.9127364158630371, "learning_rate": 9.327175008397716e-05, "loss": 1.172, "step": 8220 }, { "epoch": 0.69, "grad_norm": 0.9053615927696228, "learning_rate": 9.326335236815587e-05, "loss": 1.1817, "step": 8230 }, { "epoch": 0.69, "grad_norm": 0.9082298278808594, "learning_rate": 9.325495465233457e-05, "loss": 1.1783, "step": 8240 }, { "epoch": 0.69, "grad_norm": 0.844623327255249, "learning_rate": 9.324655693651327e-05, "loss": 1.2072, "step": 8250 }, { "epoch": 0.69, "grad_norm": 0.8887671828269958, "learning_rate": 9.323815922069197e-05, "loss": 1.1798, "step": 8260 }, { "epoch": 0.69, "grad_norm": 0.9145581722259521, "learning_rate": 9.322976150487069e-05, "loss": 1.1876, "step": 8270 }, { "epoch": 0.69, "grad_norm": 0.8859278559684753, "learning_rate": 9.322136378904939e-05, "loss": 1.1902, "step": 8280 }, { "epoch": 0.69, "grad_norm": 0.8818907737731934, "learning_rate": 9.321296607322809e-05, "loss": 1.1694, "step": 8290 }, { "epoch": 0.7, "grad_norm": 1.0000308752059937, "learning_rate": 9.320456835740679e-05, "loss": 1.1692, "step": 8300 }, { "epoch": 0.7, "grad_norm": 0.8591494560241699, "learning_rate": 9.319617064158549e-05, "loss": 1.1747, "step": 8310 }, { "epoch": 0.7, "grad_norm": 0.8520761728286743, "learning_rate": 9.318777292576419e-05, "loss": 1.2012, "step": 8320 }, { "epoch": 0.7, "grad_norm": 0.9467620253562927, "learning_rate": 9.317937520994289e-05, "loss": 1.1861, "step": 8330 }, { "epoch": 0.7, "grad_norm": 0.8405246734619141, "learning_rate": 9.31709774941216e-05, "loss": 1.1778, "step": 8340 }, { "epoch": 0.7, "grad_norm": 0.9633544683456421, "learning_rate": 9.316257977830031e-05, "loss": 1.1955, "step": 8350 }, { "epoch": 0.7, "grad_norm": 1.0017706155776978, "learning_rate": 9.315418206247901e-05, "loss": 1.1693, "step": 8360 }, { "epoch": 0.7, "grad_norm": 0.8827065825462341, "learning_rate": 9.314578434665772e-05, "loss": 1.1906, "step": 8370 }, { "epoch": 0.7, "grad_norm": 0.939825177192688, "learning_rate": 9.313738663083642e-05, "loss": 1.1933, "step": 8380 }, { "epoch": 0.7, "grad_norm": 0.9015594720840454, "learning_rate": 9.312898891501512e-05, "loss": 1.1589, "step": 8390 }, { "epoch": 0.7, "grad_norm": 0.8750881552696228, "learning_rate": 9.312059119919382e-05, "loss": 1.1882, "step": 8400 }, { "epoch": 0.71, "grad_norm": 0.8922222256660461, "learning_rate": 9.311219348337252e-05, "loss": 1.1742, "step": 8410 }, { "epoch": 0.71, "grad_norm": 0.8243392705917358, "learning_rate": 9.310379576755123e-05, "loss": 1.1882, "step": 8420 }, { "epoch": 0.71, "grad_norm": 0.9192656874656677, "learning_rate": 9.309539805172993e-05, "loss": 1.2012, "step": 8430 }, { "epoch": 0.71, "grad_norm": 0.8955969214439392, "learning_rate": 9.308700033590864e-05, "loss": 1.1702, "step": 8440 }, { "epoch": 0.71, "grad_norm": 0.9001456499099731, "learning_rate": 9.307860262008734e-05, "loss": 1.164, "step": 8450 }, { "epoch": 0.71, "grad_norm": 0.9576339721679688, "learning_rate": 9.307020490426604e-05, "loss": 1.1681, "step": 8460 }, { "epoch": 0.71, "grad_norm": 0.8319069743156433, "learning_rate": 9.306180718844476e-05, "loss": 1.1865, "step": 8470 }, { "epoch": 0.71, "grad_norm": 0.8679335713386536, "learning_rate": 9.305340947262346e-05, "loss": 1.1936, "step": 8480 }, { "epoch": 0.71, "grad_norm": 0.8643022775650024, "learning_rate": 9.304501175680216e-05, "loss": 1.1775, "step": 8490 }, { "epoch": 0.71, "grad_norm": 0.9259525537490845, "learning_rate": 9.303661404098086e-05, "loss": 1.1834, "step": 8500 }, { "epoch": 0.71, "grad_norm": 0.8675290942192078, "learning_rate": 9.302821632515956e-05, "loss": 1.2006, "step": 8510 }, { "epoch": 0.71, "grad_norm": 0.9066543579101562, "learning_rate": 9.301981860933826e-05, "loss": 1.1676, "step": 8520 }, { "epoch": 0.72, "grad_norm": 0.8691399097442627, "learning_rate": 9.301142089351696e-05, "loss": 1.1869, "step": 8530 }, { "epoch": 0.72, "grad_norm": 0.9211807250976562, "learning_rate": 9.300302317769567e-05, "loss": 1.158, "step": 8540 }, { "epoch": 0.72, "grad_norm": 0.8890571594238281, "learning_rate": 9.299462546187437e-05, "loss": 1.1683, "step": 8550 }, { "epoch": 0.72, "grad_norm": 0.9606330990791321, "learning_rate": 9.298622774605308e-05, "loss": 1.1803, "step": 8560 }, { "epoch": 0.72, "grad_norm": 0.923599123954773, "learning_rate": 9.297783003023178e-05, "loss": 1.1786, "step": 8570 }, { "epoch": 0.72, "grad_norm": 0.8857382535934448, "learning_rate": 9.296943231441049e-05, "loss": 1.184, "step": 8580 }, { "epoch": 0.72, "grad_norm": 0.9653752446174622, "learning_rate": 9.296103459858919e-05, "loss": 1.1642, "step": 8590 }, { "epoch": 0.72, "grad_norm": 0.9239545464515686, "learning_rate": 9.295263688276789e-05, "loss": 1.1574, "step": 8600 }, { "epoch": 0.72, "grad_norm": 0.933377742767334, "learning_rate": 9.294423916694659e-05, "loss": 1.179, "step": 8610 }, { "epoch": 0.72, "grad_norm": 0.9790053963661194, "learning_rate": 9.29358414511253e-05, "loss": 1.1891, "step": 8620 }, { "epoch": 0.72, "grad_norm": 0.9635185599327087, "learning_rate": 9.2927443735304e-05, "loss": 1.173, "step": 8630 }, { "epoch": 0.72, "grad_norm": 0.8766095638275146, "learning_rate": 9.291904601948271e-05, "loss": 1.1868, "step": 8640 }, { "epoch": 0.73, "grad_norm": 0.8605684638023376, "learning_rate": 9.291064830366141e-05, "loss": 1.1693, "step": 8650 }, { "epoch": 0.73, "grad_norm": 0.9944674968719482, "learning_rate": 9.290225058784011e-05, "loss": 1.1793, "step": 8660 }, { "epoch": 0.73, "grad_norm": 0.9609712958335876, "learning_rate": 9.289385287201881e-05, "loss": 1.1832, "step": 8670 }, { "epoch": 0.73, "grad_norm": 0.9557624459266663, "learning_rate": 9.288545515619752e-05, "loss": 1.1872, "step": 8680 }, { "epoch": 0.73, "grad_norm": 0.8827246427536011, "learning_rate": 9.287705744037623e-05, "loss": 1.1808, "step": 8690 }, { "epoch": 0.73, "grad_norm": 1.0152974128723145, "learning_rate": 9.286865972455493e-05, "loss": 1.1783, "step": 8700 }, { "epoch": 0.73, "grad_norm": 0.8559834361076355, "learning_rate": 9.286026200873364e-05, "loss": 1.2048, "step": 8710 }, { "epoch": 0.73, "grad_norm": 0.9328780174255371, "learning_rate": 9.285186429291233e-05, "loss": 1.1695, "step": 8720 }, { "epoch": 0.73, "grad_norm": 0.8733211755752563, "learning_rate": 9.284346657709103e-05, "loss": 1.1889, "step": 8730 }, { "epoch": 0.73, "grad_norm": 0.9180071353912354, "learning_rate": 9.283506886126974e-05, "loss": 1.1722, "step": 8740 }, { "epoch": 0.73, "grad_norm": 0.8515558838844299, "learning_rate": 9.282667114544844e-05, "loss": 1.1453, "step": 8750 }, { "epoch": 0.73, "grad_norm": 0.9535048604011536, "learning_rate": 9.281827342962714e-05, "loss": 1.1899, "step": 8760 }, { "epoch": 0.74, "grad_norm": 0.8584855198860168, "learning_rate": 9.280987571380584e-05, "loss": 1.1743, "step": 8770 }, { "epoch": 0.74, "grad_norm": 0.824908435344696, "learning_rate": 9.280147799798456e-05, "loss": 1.1732, "step": 8780 }, { "epoch": 0.74, "grad_norm": 0.9606408476829529, "learning_rate": 9.279308028216326e-05, "loss": 1.1914, "step": 8790 }, { "epoch": 0.74, "grad_norm": 0.8435086011886597, "learning_rate": 9.278468256634196e-05, "loss": 1.1803, "step": 8800 }, { "epoch": 0.74, "grad_norm": 0.9127106666564941, "learning_rate": 9.277628485052066e-05, "loss": 1.1815, "step": 8810 }, { "epoch": 0.74, "grad_norm": 0.8833587169647217, "learning_rate": 9.276788713469937e-05, "loss": 1.1751, "step": 8820 }, { "epoch": 0.74, "grad_norm": 0.8695579171180725, "learning_rate": 9.275948941887806e-05, "loss": 1.1654, "step": 8830 }, { "epoch": 0.74, "grad_norm": 0.8679482340812683, "learning_rate": 9.275109170305676e-05, "loss": 1.1682, "step": 8840 }, { "epoch": 0.74, "grad_norm": 0.9294605255126953, "learning_rate": 9.274269398723548e-05, "loss": 1.1913, "step": 8850 }, { "epoch": 0.74, "grad_norm": 0.8430615663528442, "learning_rate": 9.273429627141418e-05, "loss": 1.1837, "step": 8860 }, { "epoch": 0.74, "grad_norm": 0.8895713090896606, "learning_rate": 9.272589855559288e-05, "loss": 1.169, "step": 8870 }, { "epoch": 0.74, "grad_norm": 0.9212405681610107, "learning_rate": 9.271750083977159e-05, "loss": 1.1847, "step": 8880 }, { "epoch": 0.75, "grad_norm": 0.814430296421051, "learning_rate": 9.27091031239503e-05, "loss": 1.1888, "step": 8890 }, { "epoch": 0.75, "grad_norm": 0.8758382797241211, "learning_rate": 9.2700705408129e-05, "loss": 1.1532, "step": 8900 }, { "epoch": 0.75, "grad_norm": 0.9425457119941711, "learning_rate": 9.26923076923077e-05, "loss": 1.1749, "step": 8910 }, { "epoch": 0.75, "grad_norm": 0.9039488434791565, "learning_rate": 9.268390997648641e-05, "loss": 1.1956, "step": 8920 }, { "epoch": 0.75, "grad_norm": 0.8784694075584412, "learning_rate": 9.26755122606651e-05, "loss": 1.1836, "step": 8930 }, { "epoch": 0.75, "grad_norm": 0.9849941730499268, "learning_rate": 9.26671145448438e-05, "loss": 1.1751, "step": 8940 }, { "epoch": 0.75, "grad_norm": 0.9492660164833069, "learning_rate": 9.265871682902251e-05, "loss": 1.1705, "step": 8950 }, { "epoch": 0.75, "grad_norm": 0.9221577048301697, "learning_rate": 9.265031911320121e-05, "loss": 1.1637, "step": 8960 }, { "epoch": 0.75, "grad_norm": 0.910025954246521, "learning_rate": 9.264192139737991e-05, "loss": 1.2021, "step": 8970 }, { "epoch": 0.75, "grad_norm": 0.950025737285614, "learning_rate": 9.263352368155863e-05, "loss": 1.153, "step": 8980 }, { "epoch": 0.75, "grad_norm": 0.8990755081176758, "learning_rate": 9.262512596573733e-05, "loss": 1.1666, "step": 8990 }, { "epoch": 0.75, "grad_norm": 0.961871862411499, "learning_rate": 9.261672824991603e-05, "loss": 1.1778, "step": 9000 }, { "epoch": 0.76, "grad_norm": 0.9348917603492737, "learning_rate": 9.260833053409473e-05, "loss": 1.1706, "step": 9010 }, { "epoch": 0.76, "grad_norm": 0.9270437359809875, "learning_rate": 9.259993281827344e-05, "loss": 1.1743, "step": 9020 }, { "epoch": 0.76, "grad_norm": 0.9181728959083557, "learning_rate": 9.259153510245214e-05, "loss": 1.2038, "step": 9030 }, { "epoch": 0.76, "grad_norm": 1.3287311792373657, "learning_rate": 9.258313738663083e-05, "loss": 1.1703, "step": 9040 }, { "epoch": 0.76, "grad_norm": 0.8790122866630554, "learning_rate": 9.257473967080954e-05, "loss": 1.1765, "step": 9050 }, { "epoch": 0.76, "grad_norm": 0.9112759232521057, "learning_rate": 9.256634195498825e-05, "loss": 1.1792, "step": 9060 }, { "epoch": 0.76, "grad_norm": 0.9480587244033813, "learning_rate": 9.255794423916695e-05, "loss": 1.1682, "step": 9070 }, { "epoch": 0.76, "grad_norm": 0.9825183749198914, "learning_rate": 9.254954652334566e-05, "loss": 1.1682, "step": 9080 }, { "epoch": 0.76, "grad_norm": 0.9880927205085754, "learning_rate": 9.254114880752436e-05, "loss": 1.1512, "step": 9090 }, { "epoch": 0.76, "grad_norm": 0.9363360404968262, "learning_rate": 9.253275109170306e-05, "loss": 1.1675, "step": 9100 }, { "epoch": 0.76, "grad_norm": 0.9263131618499756, "learning_rate": 9.252435337588176e-05, "loss": 1.1895, "step": 9110 }, { "epoch": 0.76, "grad_norm": 0.8793935179710388, "learning_rate": 9.251595566006048e-05, "loss": 1.1815, "step": 9120 }, { "epoch": 0.77, "grad_norm": 0.9015591740608215, "learning_rate": 9.250755794423918e-05, "loss": 1.2022, "step": 9130 }, { "epoch": 0.77, "grad_norm": 0.8961725831031799, "learning_rate": 9.249916022841786e-05, "loss": 1.1599, "step": 9140 }, { "epoch": 0.77, "grad_norm": 0.8976597189903259, "learning_rate": 9.249076251259658e-05, "loss": 1.1735, "step": 9150 }, { "epoch": 0.77, "grad_norm": 0.8971372246742249, "learning_rate": 9.248236479677528e-05, "loss": 1.1544, "step": 9160 }, { "epoch": 0.77, "grad_norm": 1.0064512491226196, "learning_rate": 9.247396708095398e-05, "loss": 1.1762, "step": 9170 }, { "epoch": 0.77, "grad_norm": 0.9865056872367859, "learning_rate": 9.246556936513268e-05, "loss": 1.1849, "step": 9180 }, { "epoch": 0.77, "grad_norm": 0.8811156153678894, "learning_rate": 9.24571716493114e-05, "loss": 1.1826, "step": 9190 }, { "epoch": 0.77, "grad_norm": 0.9140886664390564, "learning_rate": 9.24487739334901e-05, "loss": 1.1834, "step": 9200 }, { "epoch": 0.77, "grad_norm": 0.8452087044715881, "learning_rate": 9.24403762176688e-05, "loss": 1.1668, "step": 9210 }, { "epoch": 0.77, "grad_norm": 0.9415846467018127, "learning_rate": 9.243197850184751e-05, "loss": 1.1692, "step": 9220 }, { "epoch": 0.77, "grad_norm": 0.9672451615333557, "learning_rate": 9.242358078602621e-05, "loss": 1.1521, "step": 9230 }, { "epoch": 0.77, "grad_norm": 0.9130426049232483, "learning_rate": 9.241518307020491e-05, "loss": 1.189, "step": 9240 }, { "epoch": 0.78, "grad_norm": 0.8777864575386047, "learning_rate": 9.240678535438361e-05, "loss": 1.1851, "step": 9250 }, { "epoch": 0.78, "grad_norm": 0.9119729399681091, "learning_rate": 9.239838763856231e-05, "loss": 1.1579, "step": 9260 }, { "epoch": 0.78, "grad_norm": 0.889671802520752, "learning_rate": 9.238998992274101e-05, "loss": 1.1705, "step": 9270 }, { "epoch": 0.78, "grad_norm": 0.9760835766792297, "learning_rate": 9.238159220691971e-05, "loss": 1.1775, "step": 9280 }, { "epoch": 0.78, "grad_norm": 0.8995108008384705, "learning_rate": 9.237319449109843e-05, "loss": 1.1662, "step": 9290 }, { "epoch": 0.78, "grad_norm": 0.81166011095047, "learning_rate": 9.236479677527713e-05, "loss": 1.1838, "step": 9300 }, { "epoch": 0.78, "grad_norm": 0.9276900291442871, "learning_rate": 9.235639905945583e-05, "loss": 1.1678, "step": 9310 }, { "epoch": 0.78, "grad_norm": 0.8583055138587952, "learning_rate": 9.234800134363454e-05, "loss": 1.1623, "step": 9320 }, { "epoch": 0.78, "grad_norm": 0.9347761869430542, "learning_rate": 9.233960362781325e-05, "loss": 1.1803, "step": 9330 }, { "epoch": 0.78, "grad_norm": 0.9358561038970947, "learning_rate": 9.233120591199195e-05, "loss": 1.1681, "step": 9340 }, { "epoch": 0.78, "grad_norm": 0.8344528675079346, "learning_rate": 9.232280819617065e-05, "loss": 1.1495, "step": 9350 }, { "epoch": 0.78, "grad_norm": 0.8631145358085632, "learning_rate": 9.231441048034935e-05, "loss": 1.1783, "step": 9360 }, { "epoch": 0.79, "grad_norm": 0.8783475756645203, "learning_rate": 9.230601276452805e-05, "loss": 1.1714, "step": 9370 }, { "epoch": 0.79, "grad_norm": 0.9026092886924744, "learning_rate": 9.229761504870675e-05, "loss": 1.1732, "step": 9380 }, { "epoch": 0.79, "grad_norm": 0.9233419895172119, "learning_rate": 9.228921733288546e-05, "loss": 1.1703, "step": 9390 }, { "epoch": 0.79, "grad_norm": 0.8974713683128357, "learning_rate": 9.228081961706416e-05, "loss": 1.1636, "step": 9400 }, { "epoch": 0.79, "grad_norm": 0.9101700782775879, "learning_rate": 9.227242190124286e-05, "loss": 1.1935, "step": 9410 }, { "epoch": 0.79, "grad_norm": 0.9138025045394897, "learning_rate": 9.226402418542157e-05, "loss": 1.1512, "step": 9420 }, { "epoch": 0.79, "grad_norm": 0.9470272660255432, "learning_rate": 9.225562646960028e-05, "loss": 1.1786, "step": 9430 }, { "epoch": 0.79, "grad_norm": 0.9542033076286316, "learning_rate": 9.224722875377898e-05, "loss": 1.183, "step": 9440 }, { "epoch": 0.79, "grad_norm": 0.8865834474563599, "learning_rate": 9.223883103795768e-05, "loss": 1.1781, "step": 9450 }, { "epoch": 0.79, "grad_norm": 0.901871919631958, "learning_rate": 9.223043332213638e-05, "loss": 1.1771, "step": 9460 }, { "epoch": 0.79, "grad_norm": 0.9759643077850342, "learning_rate": 9.222203560631508e-05, "loss": 1.1702, "step": 9470 }, { "epoch": 0.79, "grad_norm": 0.9939475655555725, "learning_rate": 9.221363789049378e-05, "loss": 1.1553, "step": 9480 }, { "epoch": 0.8, "grad_norm": 0.939246416091919, "learning_rate": 9.22052401746725e-05, "loss": 1.1614, "step": 9490 }, { "epoch": 0.8, "grad_norm": 0.9197496175765991, "learning_rate": 9.21968424588512e-05, "loss": 1.1485, "step": 9500 }, { "epoch": 0.8, "grad_norm": 0.8700929880142212, "learning_rate": 9.21884447430299e-05, "loss": 1.1646, "step": 9510 }, { "epoch": 0.8, "grad_norm": 0.8767945170402527, "learning_rate": 9.21800470272086e-05, "loss": 1.1616, "step": 9520 }, { "epoch": 0.8, "grad_norm": 0.8359785079956055, "learning_rate": 9.217164931138731e-05, "loss": 1.1835, "step": 9530 }, { "epoch": 0.8, "grad_norm": 0.865118682384491, "learning_rate": 9.216325159556601e-05, "loss": 1.1891, "step": 9540 }, { "epoch": 0.8, "grad_norm": 1.0089035034179688, "learning_rate": 9.215485387974471e-05, "loss": 1.1802, "step": 9550 }, { "epoch": 0.8, "grad_norm": 0.9262118339538574, "learning_rate": 9.214645616392343e-05, "loss": 1.1637, "step": 9560 }, { "epoch": 0.8, "grad_norm": 0.9462135434150696, "learning_rate": 9.213805844810212e-05, "loss": 1.1884, "step": 9570 }, { "epoch": 0.8, "grad_norm": 0.8781193494796753, "learning_rate": 9.212966073228082e-05, "loss": 1.1789, "step": 9580 }, { "epoch": 0.8, "grad_norm": 0.8772711753845215, "learning_rate": 9.212126301645953e-05, "loss": 1.1745, "step": 9590 }, { "epoch": 0.8, "grad_norm": 0.9201531410217285, "learning_rate": 9.211286530063823e-05, "loss": 1.1797, "step": 9600 }, { "epoch": 0.81, "grad_norm": 0.8998798131942749, "learning_rate": 9.210446758481693e-05, "loss": 1.1627, "step": 9610 }, { "epoch": 0.81, "grad_norm": 0.8889737725257874, "learning_rate": 9.209606986899563e-05, "loss": 1.1745, "step": 9620 }, { "epoch": 0.81, "grad_norm": 0.959130585193634, "learning_rate": 9.208767215317435e-05, "loss": 1.1554, "step": 9630 }, { "epoch": 0.81, "grad_norm": 0.8671838045120239, "learning_rate": 9.207927443735305e-05, "loss": 1.1606, "step": 9640 }, { "epoch": 0.81, "grad_norm": 0.8932032585144043, "learning_rate": 9.207087672153175e-05, "loss": 1.1815, "step": 9650 }, { "epoch": 0.81, "grad_norm": 0.9338779449462891, "learning_rate": 9.206247900571045e-05, "loss": 1.1754, "step": 9660 }, { "epoch": 0.81, "grad_norm": 0.9668774604797363, "learning_rate": 9.205408128988915e-05, "loss": 1.1867, "step": 9670 }, { "epoch": 0.81, "grad_norm": 0.8983999490737915, "learning_rate": 9.204568357406785e-05, "loss": 1.1837, "step": 9680 }, { "epoch": 0.81, "grad_norm": 0.9296559691429138, "learning_rate": 9.203728585824655e-05, "loss": 1.1728, "step": 9690 }, { "epoch": 0.81, "grad_norm": 0.9125829935073853, "learning_rate": 9.202888814242527e-05, "loss": 1.201, "step": 9700 }, { "epoch": 0.81, "grad_norm": 0.9010559916496277, "learning_rate": 9.202049042660397e-05, "loss": 1.1753, "step": 9710 }, { "epoch": 0.81, "grad_norm": 0.9880275726318359, "learning_rate": 9.201209271078267e-05, "loss": 1.187, "step": 9720 }, { "epoch": 0.82, "grad_norm": 0.9273577332496643, "learning_rate": 9.200369499496138e-05, "loss": 1.1558, "step": 9730 }, { "epoch": 0.82, "grad_norm": 0.9076495170593262, "learning_rate": 9.199529727914008e-05, "loss": 1.1535, "step": 9740 }, { "epoch": 0.82, "grad_norm": 0.9300388693809509, "learning_rate": 9.198689956331878e-05, "loss": 1.1452, "step": 9750 }, { "epoch": 0.82, "grad_norm": 0.8782075047492981, "learning_rate": 9.197850184749748e-05, "loss": 1.1606, "step": 9760 }, { "epoch": 0.82, "grad_norm": 0.8622875809669495, "learning_rate": 9.19701041316762e-05, "loss": 1.1505, "step": 9770 }, { "epoch": 0.82, "grad_norm": 0.9150767922401428, "learning_rate": 9.196170641585488e-05, "loss": 1.1427, "step": 9780 }, { "epoch": 0.82, "grad_norm": 0.8404105305671692, "learning_rate": 9.195330870003359e-05, "loss": 1.1717, "step": 9790 }, { "epoch": 0.82, "grad_norm": 0.9195795059204102, "learning_rate": 9.19449109842123e-05, "loss": 1.1623, "step": 9800 }, { "epoch": 0.82, "grad_norm": 0.9158319234848022, "learning_rate": 9.1936513268391e-05, "loss": 1.1797, "step": 9810 }, { "epoch": 0.82, "grad_norm": 0.859521210193634, "learning_rate": 9.19281155525697e-05, "loss": 1.1862, "step": 9820 }, { "epoch": 0.82, "grad_norm": 0.876270055770874, "learning_rate": 9.191971783674842e-05, "loss": 1.1652, "step": 9830 }, { "epoch": 0.82, "grad_norm": 0.8563090562820435, "learning_rate": 9.191132012092712e-05, "loss": 1.1576, "step": 9840 }, { "epoch": 0.83, "grad_norm": 0.7962527275085449, "learning_rate": 9.190292240510582e-05, "loss": 1.1742, "step": 9850 }, { "epoch": 0.83, "grad_norm": 0.9961697459220886, "learning_rate": 9.189452468928452e-05, "loss": 1.1736, "step": 9860 }, { "epoch": 0.83, "grad_norm": 0.9126843810081482, "learning_rate": 9.188612697346323e-05, "loss": 1.1671, "step": 9870 }, { "epoch": 0.83, "grad_norm": 0.9100418090820312, "learning_rate": 9.187772925764193e-05, "loss": 1.1627, "step": 9880 }, { "epoch": 0.83, "grad_norm": 0.9611273407936096, "learning_rate": 9.186933154182062e-05, "loss": 1.2006, "step": 9890 }, { "epoch": 0.83, "grad_norm": 0.8314115405082703, "learning_rate": 9.186093382599933e-05, "loss": 1.1535, "step": 9900 }, { "epoch": 0.83, "grad_norm": 0.8938583135604858, "learning_rate": 9.185253611017803e-05, "loss": 1.1356, "step": 9910 }, { "epoch": 0.83, "grad_norm": 0.8760045170783997, "learning_rate": 9.184413839435674e-05, "loss": 1.1729, "step": 9920 }, { "epoch": 0.83, "grad_norm": 0.9797518253326416, "learning_rate": 9.183574067853544e-05, "loss": 1.1655, "step": 9930 }, { "epoch": 0.83, "grad_norm": 0.9288764595985413, "learning_rate": 9.182734296271415e-05, "loss": 1.1553, "step": 9940 }, { "epoch": 0.83, "grad_norm": 0.8398875594139099, "learning_rate": 9.181894524689285e-05, "loss": 1.1733, "step": 9950 }, { "epoch": 0.83, "grad_norm": 0.9302919507026672, "learning_rate": 9.181054753107155e-05, "loss": 1.1741, "step": 9960 }, { "epoch": 0.84, "grad_norm": 0.9887378215789795, "learning_rate": 9.180214981525027e-05, "loss": 1.1858, "step": 9970 }, { "epoch": 0.84, "grad_norm": 0.8962682485580444, "learning_rate": 9.179375209942897e-05, "loss": 1.185, "step": 9980 }, { "epoch": 0.84, "grad_norm": 0.9205465912818909, "learning_rate": 9.178535438360765e-05, "loss": 1.1588, "step": 9990 }, { "epoch": 0.84, "grad_norm": 0.8461117148399353, "learning_rate": 9.177695666778637e-05, "loss": 1.1647, "step": 10000 }, { "epoch": 0.84, "grad_norm": 0.8444212675094604, "learning_rate": 9.176855895196507e-05, "loss": 1.1676, "step": 10010 }, { "epoch": 0.84, "grad_norm": 0.8764084577560425, "learning_rate": 9.176016123614377e-05, "loss": 1.1807, "step": 10020 }, { "epoch": 0.84, "grad_norm": 0.9083003997802734, "learning_rate": 9.175176352032247e-05, "loss": 1.1883, "step": 10030 }, { "epoch": 0.84, "grad_norm": 0.9371087551116943, "learning_rate": 9.174336580450118e-05, "loss": 1.1619, "step": 10040 }, { "epoch": 0.84, "grad_norm": 0.7800226807594299, "learning_rate": 9.173496808867988e-05, "loss": 1.1596, "step": 10050 }, { "epoch": 0.84, "grad_norm": 0.9080156087875366, "learning_rate": 9.172657037285859e-05, "loss": 1.1919, "step": 10060 }, { "epoch": 0.84, "grad_norm": 0.9327325224876404, "learning_rate": 9.17181726570373e-05, "loss": 1.1572, "step": 10070 }, { "epoch": 0.85, "grad_norm": 0.9094728231430054, "learning_rate": 9.1709774941216e-05, "loss": 1.1813, "step": 10080 }, { "epoch": 0.85, "grad_norm": 0.922223687171936, "learning_rate": 9.17013772253947e-05, "loss": 1.1788, "step": 10090 }, { "epoch": 0.85, "grad_norm": 0.9285407662391663, "learning_rate": 9.16929795095734e-05, "loss": 1.1764, "step": 10100 }, { "epoch": 0.85, "grad_norm": 0.9567670226097107, "learning_rate": 9.16845817937521e-05, "loss": 1.1673, "step": 10110 }, { "epoch": 0.85, "grad_norm": 0.9216251373291016, "learning_rate": 9.16761840779308e-05, "loss": 1.1586, "step": 10120 }, { "epoch": 0.85, "grad_norm": 0.8563306927680969, "learning_rate": 9.16677863621095e-05, "loss": 1.1696, "step": 10130 }, { "epoch": 0.85, "grad_norm": 0.8764593601226807, "learning_rate": 9.165938864628822e-05, "loss": 1.1917, "step": 10140 }, { "epoch": 0.85, "grad_norm": 0.8917428255081177, "learning_rate": 9.165099093046692e-05, "loss": 1.1826, "step": 10150 }, { "epoch": 0.85, "grad_norm": 0.8859480619430542, "learning_rate": 9.164259321464562e-05, "loss": 1.1565, "step": 10160 }, { "epoch": 0.85, "grad_norm": 0.8752598762512207, "learning_rate": 9.163419549882432e-05, "loss": 1.1547, "step": 10170 }, { "epoch": 0.85, "grad_norm": 0.9562612771987915, "learning_rate": 9.162579778300303e-05, "loss": 1.1731, "step": 10180 }, { "epoch": 0.85, "grad_norm": 0.9379001259803772, "learning_rate": 9.161740006718174e-05, "loss": 1.1668, "step": 10190 }, { "epoch": 0.86, "grad_norm": 0.9116417169570923, "learning_rate": 9.160900235136042e-05, "loss": 1.1662, "step": 10200 }, { "epoch": 0.86, "grad_norm": 0.8588632941246033, "learning_rate": 9.160060463553914e-05, "loss": 1.1424, "step": 10210 }, { "epoch": 0.86, "grad_norm": 0.9671397805213928, "learning_rate": 9.159220691971784e-05, "loss": 1.1892, "step": 10220 }, { "epoch": 0.86, "grad_norm": 0.845672070980072, "learning_rate": 9.158380920389654e-05, "loss": 1.1758, "step": 10230 }, { "epoch": 0.86, "grad_norm": 0.9316462278366089, "learning_rate": 9.157541148807525e-05, "loss": 1.168, "step": 10240 }, { "epoch": 0.86, "grad_norm": 0.8582399487495422, "learning_rate": 9.156701377225395e-05, "loss": 1.1648, "step": 10250 }, { "epoch": 0.86, "grad_norm": 0.8619750142097473, "learning_rate": 9.155861605643265e-05, "loss": 1.1883, "step": 10260 }, { "epoch": 0.86, "grad_norm": 0.935123085975647, "learning_rate": 9.155021834061135e-05, "loss": 1.1745, "step": 10270 }, { "epoch": 0.86, "grad_norm": 0.8686178922653198, "learning_rate": 9.154182062479007e-05, "loss": 1.1724, "step": 10280 }, { "epoch": 0.86, "grad_norm": 0.9323543906211853, "learning_rate": 9.153342290896877e-05, "loss": 1.1667, "step": 10290 }, { "epoch": 0.86, "grad_norm": 0.9259883761405945, "learning_rate": 9.152502519314747e-05, "loss": 1.1531, "step": 10300 }, { "epoch": 0.86, "grad_norm": 0.9095200896263123, "learning_rate": 9.151662747732617e-05, "loss": 1.1697, "step": 10310 }, { "epoch": 0.87, "grad_norm": 0.9084714651107788, "learning_rate": 9.150822976150487e-05, "loss": 1.1824, "step": 10320 }, { "epoch": 0.87, "grad_norm": 0.9516263008117676, "learning_rate": 9.149983204568357e-05, "loss": 1.156, "step": 10330 }, { "epoch": 0.87, "grad_norm": 0.9567800760269165, "learning_rate": 9.149143432986229e-05, "loss": 1.1637, "step": 10340 }, { "epoch": 0.87, "grad_norm": 0.9155131578445435, "learning_rate": 9.148303661404099e-05, "loss": 1.1637, "step": 10350 }, { "epoch": 0.87, "grad_norm": 0.8505834937095642, "learning_rate": 9.147463889821969e-05, "loss": 1.1756, "step": 10360 }, { "epoch": 0.87, "grad_norm": 0.8978041410446167, "learning_rate": 9.146624118239839e-05, "loss": 1.1489, "step": 10370 }, { "epoch": 0.87, "grad_norm": 0.9028456807136536, "learning_rate": 9.14578434665771e-05, "loss": 1.1482, "step": 10380 }, { "epoch": 0.87, "grad_norm": 0.9012831449508667, "learning_rate": 9.14494457507558e-05, "loss": 1.184, "step": 10390 }, { "epoch": 0.87, "grad_norm": 0.9119836091995239, "learning_rate": 9.14410480349345e-05, "loss": 1.149, "step": 10400 }, { "epoch": 0.87, "grad_norm": 0.8373876214027405, "learning_rate": 9.14326503191132e-05, "loss": 1.171, "step": 10410 }, { "epoch": 0.87, "grad_norm": 0.9856306314468384, "learning_rate": 9.14242526032919e-05, "loss": 1.1641, "step": 10420 }, { "epoch": 0.87, "grad_norm": 0.9325682520866394, "learning_rate": 9.14158548874706e-05, "loss": 1.1481, "step": 10430 }, { "epoch": 0.88, "grad_norm": 0.9177975654602051, "learning_rate": 9.14074571716493e-05, "loss": 1.172, "step": 10440 }, { "epoch": 0.88, "grad_norm": 0.8825708627700806, "learning_rate": 9.139905945582802e-05, "loss": 1.1624, "step": 10450 }, { "epoch": 0.88, "grad_norm": 0.8930522203445435, "learning_rate": 9.139066174000672e-05, "loss": 1.1812, "step": 10460 }, { "epoch": 0.88, "grad_norm": 0.957118809223175, "learning_rate": 9.138226402418542e-05, "loss": 1.165, "step": 10470 }, { "epoch": 0.88, "grad_norm": 0.9745475649833679, "learning_rate": 9.137386630836414e-05, "loss": 1.1507, "step": 10480 }, { "epoch": 0.88, "grad_norm": 1.0125019550323486, "learning_rate": 9.136546859254284e-05, "loss": 1.1545, "step": 10490 }, { "epoch": 0.88, "grad_norm": 0.8771555423736572, "learning_rate": 9.135707087672154e-05, "loss": 1.1521, "step": 10500 }, { "epoch": 0.88, "grad_norm": 0.8748019933700562, "learning_rate": 9.134867316090024e-05, "loss": 1.14, "step": 10510 }, { "epoch": 0.88, "grad_norm": 0.8950918316841125, "learning_rate": 9.134027544507894e-05, "loss": 1.1486, "step": 10520 }, { "epoch": 0.88, "grad_norm": 0.9525067210197449, "learning_rate": 9.133187772925764e-05, "loss": 1.1501, "step": 10530 }, { "epoch": 0.88, "grad_norm": 0.8265681266784668, "learning_rate": 9.132348001343634e-05, "loss": 1.1605, "step": 10540 }, { "epoch": 0.88, "grad_norm": 0.9634080529212952, "learning_rate": 9.131508229761505e-05, "loss": 1.1561, "step": 10550 }, { "epoch": 0.89, "grad_norm": 0.8635426759719849, "learning_rate": 9.130668458179376e-05, "loss": 1.1696, "step": 10560 }, { "epoch": 0.89, "grad_norm": 0.994739294052124, "learning_rate": 9.129828686597246e-05, "loss": 1.1778, "step": 10570 }, { "epoch": 0.89, "grad_norm": 0.89042729139328, "learning_rate": 9.128988915015117e-05, "loss": 1.1667, "step": 10580 }, { "epoch": 0.89, "grad_norm": 0.868971586227417, "learning_rate": 9.128149143432987e-05, "loss": 1.1775, "step": 10590 }, { "epoch": 0.89, "grad_norm": 0.9349837899208069, "learning_rate": 9.127309371850857e-05, "loss": 1.1653, "step": 10600 }, { "epoch": 0.89, "grad_norm": 0.9134425520896912, "learning_rate": 9.126469600268727e-05, "loss": 1.1641, "step": 10610 }, { "epoch": 0.89, "grad_norm": 0.9141260385513306, "learning_rate": 9.125629828686599e-05, "loss": 1.1789, "step": 10620 }, { "epoch": 0.89, "grad_norm": 0.9114556908607483, "learning_rate": 9.124790057104467e-05, "loss": 1.1265, "step": 10630 }, { "epoch": 0.89, "grad_norm": 0.8724734783172607, "learning_rate": 9.123950285522337e-05, "loss": 1.1798, "step": 10640 }, { "epoch": 0.89, "grad_norm": 0.8508509993553162, "learning_rate": 9.123110513940209e-05, "loss": 1.1806, "step": 10650 }, { "epoch": 0.89, "grad_norm": 0.8755638003349304, "learning_rate": 9.122270742358079e-05, "loss": 1.1511, "step": 10660 }, { "epoch": 0.89, "grad_norm": 0.9825658798217773, "learning_rate": 9.121430970775949e-05, "loss": 1.1432, "step": 10670 }, { "epoch": 0.9, "grad_norm": 0.8838762044906616, "learning_rate": 9.120591199193819e-05, "loss": 1.1659, "step": 10680 }, { "epoch": 0.9, "grad_norm": 0.9895627498626709, "learning_rate": 9.11975142761169e-05, "loss": 1.1722, "step": 10690 }, { "epoch": 0.9, "grad_norm": 0.9133514761924744, "learning_rate": 9.11891165602956e-05, "loss": 1.1597, "step": 10700 }, { "epoch": 0.9, "grad_norm": 0.9397072196006775, "learning_rate": 9.11807188444743e-05, "loss": 1.1699, "step": 10710 }, { "epoch": 0.9, "grad_norm": 0.9063377380371094, "learning_rate": 9.117232112865302e-05, "loss": 1.1468, "step": 10720 }, { "epoch": 0.9, "grad_norm": 0.8843746781349182, "learning_rate": 9.116392341283171e-05, "loss": 1.167, "step": 10730 }, { "epoch": 0.9, "grad_norm": 0.9365702867507935, "learning_rate": 9.115552569701041e-05, "loss": 1.1658, "step": 10740 }, { "epoch": 0.9, "grad_norm": 0.9855867624282837, "learning_rate": 9.114712798118912e-05, "loss": 1.1822, "step": 10750 }, { "epoch": 0.9, "grad_norm": 0.8695436120033264, "learning_rate": 9.113873026536782e-05, "loss": 1.1581, "step": 10760 }, { "epoch": 0.9, "grad_norm": 0.9232392311096191, "learning_rate": 9.113033254954652e-05, "loss": 1.154, "step": 10770 }, { "epoch": 0.9, "grad_norm": 0.9410752654075623, "learning_rate": 9.112193483372522e-05, "loss": 1.1388, "step": 10780 }, { "epoch": 0.9, "grad_norm": 0.9676739573478699, "learning_rate": 9.111353711790394e-05, "loss": 1.1589, "step": 10790 }, { "epoch": 0.91, "grad_norm": 0.8949344754219055, "learning_rate": 9.110513940208264e-05, "loss": 1.1707, "step": 10800 }, { "epoch": 0.91, "grad_norm": 0.9615799188613892, "learning_rate": 9.109674168626134e-05, "loss": 1.1523, "step": 10810 }, { "epoch": 0.91, "grad_norm": 0.9283060431480408, "learning_rate": 9.108834397044005e-05, "loss": 1.1549, "step": 10820 }, { "epoch": 0.91, "grad_norm": 0.9590465426445007, "learning_rate": 9.107994625461876e-05, "loss": 1.1803, "step": 10830 }, { "epoch": 0.91, "grad_norm": 0.908495306968689, "learning_rate": 9.107154853879744e-05, "loss": 1.1709, "step": 10840 }, { "epoch": 0.91, "grad_norm": 0.9319912791252136, "learning_rate": 9.106315082297616e-05, "loss": 1.1479, "step": 10850 }, { "epoch": 0.91, "grad_norm": 0.9135957360267639, "learning_rate": 9.105475310715486e-05, "loss": 1.1666, "step": 10860 }, { "epoch": 0.91, "grad_norm": 0.8900753259658813, "learning_rate": 9.104635539133356e-05, "loss": 1.1434, "step": 10870 }, { "epoch": 0.91, "grad_norm": 0.8928583860397339, "learning_rate": 9.103795767551226e-05, "loss": 1.151, "step": 10880 }, { "epoch": 0.91, "grad_norm": 0.9014522433280945, "learning_rate": 9.102955995969097e-05, "loss": 1.1611, "step": 10890 }, { "epoch": 0.91, "grad_norm": 0.9392279386520386, "learning_rate": 9.102116224386967e-05, "loss": 1.1478, "step": 10900 }, { "epoch": 0.91, "grad_norm": 0.9129185080528259, "learning_rate": 9.101276452804837e-05, "loss": 1.1241, "step": 10910 }, { "epoch": 0.92, "grad_norm": 0.9602782726287842, "learning_rate": 9.100436681222709e-05, "loss": 1.158, "step": 10920 }, { "epoch": 0.92, "grad_norm": 0.955150306224823, "learning_rate": 9.099596909640579e-05, "loss": 1.1292, "step": 10930 }, { "epoch": 0.92, "grad_norm": 0.9290515184402466, "learning_rate": 9.098757138058449e-05, "loss": 1.1485, "step": 10940 }, { "epoch": 0.92, "grad_norm": 0.9749833941459656, "learning_rate": 9.097917366476319e-05, "loss": 1.1289, "step": 10950 }, { "epoch": 0.92, "grad_norm": 0.8840381503105164, "learning_rate": 9.097077594894189e-05, "loss": 1.1734, "step": 10960 }, { "epoch": 0.92, "grad_norm": 0.852929413318634, "learning_rate": 9.096237823312059e-05, "loss": 1.1719, "step": 10970 }, { "epoch": 0.92, "grad_norm": 0.9287542104721069, "learning_rate": 9.095398051729929e-05, "loss": 1.1687, "step": 10980 }, { "epoch": 0.92, "grad_norm": 0.9062138199806213, "learning_rate": 9.094558280147801e-05, "loss": 1.1606, "step": 10990 }, { "epoch": 0.92, "grad_norm": 0.8329747319221497, "learning_rate": 9.093718508565671e-05, "loss": 1.1551, "step": 11000 }, { "epoch": 0.92, "grad_norm": 1.0348055362701416, "learning_rate": 9.092878736983541e-05, "loss": 1.162, "step": 11010 }, { "epoch": 0.92, "grad_norm": 0.895537257194519, "learning_rate": 9.092038965401411e-05, "loss": 1.1596, "step": 11020 }, { "epoch": 0.92, "grad_norm": 0.895803689956665, "learning_rate": 9.091199193819282e-05, "loss": 1.1467, "step": 11030 }, { "epoch": 0.93, "grad_norm": 0.8784132599830627, "learning_rate": 9.090443399395366e-05, "loss": 1.1938, "step": 11040 }, { "epoch": 0.93, "grad_norm": 0.8735492825508118, "learning_rate": 9.089603627813236e-05, "loss": 1.158, "step": 11050 }, { "epoch": 0.93, "grad_norm": 0.8453689217567444, "learning_rate": 9.088763856231105e-05, "loss": 1.163, "step": 11060 }, { "epoch": 0.93, "grad_norm": 0.9577978849411011, "learning_rate": 9.087924084648976e-05, "loss": 1.1553, "step": 11070 }, { "epoch": 0.93, "grad_norm": 0.9026578068733215, "learning_rate": 9.087084313066846e-05, "loss": 1.1663, "step": 11080 }, { "epoch": 0.93, "grad_norm": 1.0003416538238525, "learning_rate": 9.086244541484716e-05, "loss": 1.1512, "step": 11090 }, { "epoch": 0.93, "grad_norm": 0.8842092156410217, "learning_rate": 9.085404769902586e-05, "loss": 1.1562, "step": 11100 }, { "epoch": 0.93, "grad_norm": 0.9195184111595154, "learning_rate": 9.084564998320458e-05, "loss": 1.1726, "step": 11110 }, { "epoch": 0.93, "grad_norm": 0.8893963694572449, "learning_rate": 9.083725226738328e-05, "loss": 1.1565, "step": 11120 }, { "epoch": 0.93, "grad_norm": 0.9130131006240845, "learning_rate": 9.082885455156198e-05, "loss": 1.1418, "step": 11130 }, { "epoch": 0.93, "grad_norm": 0.8881024122238159, "learning_rate": 9.082045683574069e-05, "loss": 1.1546, "step": 11140 }, { "epoch": 0.93, "grad_norm": 0.9244464039802551, "learning_rate": 9.081205911991939e-05, "loss": 1.1404, "step": 11150 }, { "epoch": 0.94, "grad_norm": 0.9388617873191833, "learning_rate": 9.08036614040981e-05, "loss": 1.1541, "step": 11160 }, { "epoch": 0.94, "grad_norm": 0.936011016368866, "learning_rate": 9.07952636882768e-05, "loss": 1.1812, "step": 11170 }, { "epoch": 0.94, "grad_norm": 0.8580996990203857, "learning_rate": 9.07868659724555e-05, "loss": 1.1703, "step": 11180 }, { "epoch": 0.94, "grad_norm": 0.8237022161483765, "learning_rate": 9.07784682566342e-05, "loss": 1.1398, "step": 11190 }, { "epoch": 0.94, "grad_norm": 0.8908625245094299, "learning_rate": 9.07700705408129e-05, "loss": 1.1771, "step": 11200 }, { "epoch": 0.94, "grad_norm": 0.8714804649353027, "learning_rate": 9.076167282499161e-05, "loss": 1.1444, "step": 11210 }, { "epoch": 0.94, "grad_norm": 0.9208613634109497, "learning_rate": 9.075327510917031e-05, "loss": 1.141, "step": 11220 }, { "epoch": 0.94, "grad_norm": 0.9038468599319458, "learning_rate": 9.074487739334901e-05, "loss": 1.1353, "step": 11230 }, { "epoch": 0.94, "grad_norm": 0.9819357991218567, "learning_rate": 9.073647967752771e-05, "loss": 1.1558, "step": 11240 }, { "epoch": 0.94, "grad_norm": 0.8615383505821228, "learning_rate": 9.072808196170643e-05, "loss": 1.1697, "step": 11250 }, { "epoch": 0.94, "grad_norm": 0.8920129537582397, "learning_rate": 9.071968424588513e-05, "loss": 1.1719, "step": 11260 }, { "epoch": 0.94, "grad_norm": 0.90557461977005, "learning_rate": 9.071128653006383e-05, "loss": 1.1732, "step": 11270 }, { "epoch": 0.95, "grad_norm": 0.8542124032974243, "learning_rate": 9.070288881424253e-05, "loss": 1.1391, "step": 11280 }, { "epoch": 0.95, "grad_norm": 0.9274539947509766, "learning_rate": 9.069449109842123e-05, "loss": 1.1668, "step": 11290 }, { "epoch": 0.95, "grad_norm": 0.9204723238945007, "learning_rate": 9.068609338259993e-05, "loss": 1.1517, "step": 11300 }, { "epoch": 0.95, "grad_norm": 0.9253469705581665, "learning_rate": 9.067769566677864e-05, "loss": 1.152, "step": 11310 }, { "epoch": 0.95, "grad_norm": 0.8748138546943665, "learning_rate": 9.066929795095735e-05, "loss": 1.1208, "step": 11320 }, { "epoch": 0.95, "grad_norm": 0.9187504053115845, "learning_rate": 9.066090023513605e-05, "loss": 1.1434, "step": 11330 }, { "epoch": 0.95, "grad_norm": 0.8502510786056519, "learning_rate": 9.065250251931475e-05, "loss": 1.1481, "step": 11340 }, { "epoch": 0.95, "grad_norm": 0.9351586103439331, "learning_rate": 9.064410480349346e-05, "loss": 1.1405, "step": 11350 }, { "epoch": 0.95, "grad_norm": 0.9439583420753479, "learning_rate": 9.063570708767216e-05, "loss": 1.1625, "step": 11360 }, { "epoch": 0.95, "grad_norm": 0.9154985547065735, "learning_rate": 9.062730937185086e-05, "loss": 1.1505, "step": 11370 }, { "epoch": 0.95, "grad_norm": 0.9449728727340698, "learning_rate": 9.061891165602956e-05, "loss": 1.1566, "step": 11380 }, { "epoch": 0.95, "grad_norm": 0.8563029766082764, "learning_rate": 9.061051394020826e-05, "loss": 1.1659, "step": 11390 }, { "epoch": 0.96, "grad_norm": 0.8563398122787476, "learning_rate": 9.060211622438696e-05, "loss": 1.1266, "step": 11400 }, { "epoch": 0.96, "grad_norm": 0.9950500726699829, "learning_rate": 9.059371850856568e-05, "loss": 1.1569, "step": 11410 }, { "epoch": 0.96, "grad_norm": 0.8899383544921875, "learning_rate": 9.058532079274438e-05, "loss": 1.1406, "step": 11420 }, { "epoch": 0.96, "grad_norm": 0.9327157139778137, "learning_rate": 9.057692307692308e-05, "loss": 1.132, "step": 11430 }, { "epoch": 0.96, "grad_norm": 0.8532604575157166, "learning_rate": 9.056852536110178e-05, "loss": 1.1256, "step": 11440 }, { "epoch": 0.96, "grad_norm": 1.0322303771972656, "learning_rate": 9.05601276452805e-05, "loss": 1.1338, "step": 11450 }, { "epoch": 0.96, "grad_norm": 0.937974214553833, "learning_rate": 9.05517299294592e-05, "loss": 1.1593, "step": 11460 }, { "epoch": 0.96, "grad_norm": 0.9321600198745728, "learning_rate": 9.05433322136379e-05, "loss": 1.1329, "step": 11470 }, { "epoch": 0.96, "grad_norm": 0.9697784781455994, "learning_rate": 9.05349344978166e-05, "loss": 1.1496, "step": 11480 }, { "epoch": 0.96, "grad_norm": 0.9312222599983215, "learning_rate": 9.05265367819953e-05, "loss": 1.1511, "step": 11490 }, { "epoch": 0.96, "grad_norm": 0.9349722266197205, "learning_rate": 9.0518139066174e-05, "loss": 1.1425, "step": 11500 }, { "epoch": 0.96, "grad_norm": 0.9298446774482727, "learning_rate": 9.050974135035271e-05, "loss": 1.1334, "step": 11510 }, { "epoch": 0.97, "grad_norm": 0.9233946800231934, "learning_rate": 9.050134363453141e-05, "loss": 1.145, "step": 11520 }, { "epoch": 0.97, "grad_norm": 0.8692603707313538, "learning_rate": 9.049294591871011e-05, "loss": 1.1444, "step": 11530 }, { "epoch": 0.97, "grad_norm": 0.9367170333862305, "learning_rate": 9.048454820288881e-05, "loss": 1.1709, "step": 11540 }, { "epoch": 0.97, "grad_norm": 0.8840388059616089, "learning_rate": 9.047615048706753e-05, "loss": 1.1488, "step": 11550 }, { "epoch": 0.97, "grad_norm": 0.8870083093643188, "learning_rate": 9.046775277124623e-05, "loss": 1.1619, "step": 11560 }, { "epoch": 0.97, "grad_norm": 0.9165491461753845, "learning_rate": 9.045935505542493e-05, "loss": 1.1678, "step": 11570 }, { "epoch": 0.97, "grad_norm": 0.9082239270210266, "learning_rate": 9.045095733960363e-05, "loss": 1.1483, "step": 11580 }, { "epoch": 0.97, "grad_norm": 0.8463948965072632, "learning_rate": 9.044255962378233e-05, "loss": 1.1622, "step": 11590 }, { "epoch": 0.97, "grad_norm": 0.9144827127456665, "learning_rate": 9.043416190796103e-05, "loss": 1.1706, "step": 11600 }, { "epoch": 0.97, "grad_norm": 0.887029230594635, "learning_rate": 9.042576419213973e-05, "loss": 1.1497, "step": 11610 }, { "epoch": 0.97, "grad_norm": 0.8472746014595032, "learning_rate": 9.041736647631845e-05, "loss": 1.1532, "step": 11620 }, { "epoch": 0.97, "grad_norm": 0.9237121939659119, "learning_rate": 9.040896876049715e-05, "loss": 1.1652, "step": 11630 }, { "epoch": 0.98, "grad_norm": 0.9166734218597412, "learning_rate": 9.040057104467585e-05, "loss": 1.1908, "step": 11640 }, { "epoch": 0.98, "grad_norm": 0.907728910446167, "learning_rate": 9.039217332885456e-05, "loss": 1.1566, "step": 11650 }, { "epoch": 0.98, "grad_norm": 0.887100875377655, "learning_rate": 9.038377561303326e-05, "loss": 1.1541, "step": 11660 }, { "epoch": 0.98, "grad_norm": 0.8894091844558716, "learning_rate": 9.037537789721196e-05, "loss": 1.1457, "step": 11670 }, { "epoch": 0.98, "grad_norm": 0.9175511002540588, "learning_rate": 9.036698018139066e-05, "loss": 1.1317, "step": 11680 }, { "epoch": 0.98, "grad_norm": 0.8548650145530701, "learning_rate": 9.035858246556938e-05, "loss": 1.1816, "step": 11690 }, { "epoch": 0.98, "grad_norm": 0.9049464464187622, "learning_rate": 9.035018474974807e-05, "loss": 1.1413, "step": 11700 }, { "epoch": 0.98, "grad_norm": 0.8453555703163147, "learning_rate": 9.034178703392677e-05, "loss": 1.1587, "step": 11710 }, { "epoch": 0.98, "grad_norm": 0.9028091430664062, "learning_rate": 9.033338931810548e-05, "loss": 1.1363, "step": 11720 }, { "epoch": 0.98, "grad_norm": 0.8624139428138733, "learning_rate": 9.032499160228418e-05, "loss": 1.1436, "step": 11730 }, { "epoch": 0.98, "grad_norm": 0.9154136180877686, "learning_rate": 9.031659388646288e-05, "loss": 1.1431, "step": 11740 }, { "epoch": 0.99, "grad_norm": 0.9075929522514343, "learning_rate": 9.03081961706416e-05, "loss": 1.1415, "step": 11750 }, { "epoch": 0.99, "grad_norm": 0.8844103217124939, "learning_rate": 9.02997984548203e-05, "loss": 1.1394, "step": 11760 }, { "epoch": 0.99, "grad_norm": 0.8745536208152771, "learning_rate": 9.0291400738999e-05, "loss": 1.1666, "step": 11770 }, { "epoch": 0.99, "grad_norm": 0.8478753566741943, "learning_rate": 9.02830030231777e-05, "loss": 1.1605, "step": 11780 }, { "epoch": 0.99, "grad_norm": 0.9206047654151917, "learning_rate": 9.027460530735641e-05, "loss": 1.1279, "step": 11790 }, { "epoch": 0.99, "grad_norm": 0.8796776533126831, "learning_rate": 9.02662075915351e-05, "loss": 1.1264, "step": 11800 }, { "epoch": 0.99, "grad_norm": 0.8907874226570129, "learning_rate": 9.02578098757138e-05, "loss": 1.1498, "step": 11810 }, { "epoch": 0.99, "grad_norm": 0.9629140496253967, "learning_rate": 9.024941215989252e-05, "loss": 1.151, "step": 11820 }, { "epoch": 0.99, "grad_norm": 0.897369384765625, "learning_rate": 9.024101444407122e-05, "loss": 1.1438, "step": 11830 }, { "epoch": 0.99, "grad_norm": 0.8689546585083008, "learning_rate": 9.023261672824992e-05, "loss": 1.1573, "step": 11840 }, { "epoch": 0.99, "grad_norm": 0.8658522963523865, "learning_rate": 9.022421901242862e-05, "loss": 1.1485, "step": 11850 }, { "epoch": 0.99, "grad_norm": 0.9156214594841003, "learning_rate": 9.021582129660733e-05, "loss": 1.1285, "step": 11860 }, { "epoch": 1.0, "grad_norm": 0.8841491937637329, "learning_rate": 9.020742358078603e-05, "loss": 1.1536, "step": 11870 }, { "epoch": 1.0, "grad_norm": 0.9126939177513123, "learning_rate": 9.019902586496473e-05, "loss": 1.1572, "step": 11880 }, { "epoch": 1.0, "grad_norm": 0.9111533164978027, "learning_rate": 9.019062814914345e-05, "loss": 1.164, "step": 11890 }, { "epoch": 1.0, "grad_norm": 0.8516184687614441, "learning_rate": 9.018223043332215e-05, "loss": 1.1695, "step": 11900 }, { "epoch": 1.0, "grad_norm": 0.9268118739128113, "learning_rate": 9.017383271750083e-05, "loss": 1.1369, "step": 11910 }, { "epoch": 1.0, "grad_norm": 0.8939505219459534, "learning_rate": 9.016543500167955e-05, "loss": 1.1642, "step": 11920 }, { "epoch": 1.0, "eval_loss": 1.3688740730285645, "eval_runtime": 6239.3798, "eval_samples_per_second": 266.106, "eval_steps_per_second": 4.158, "step": 11928 }, { "epoch": 1.0, "grad_norm": 0.9582164287567139, "learning_rate": 9.015703728585825e-05, "loss": 1.1547, "step": 11930 }, { "epoch": 1.0, "grad_norm": 0.8907540440559387, "learning_rate": 9.014863957003695e-05, "loss": 1.1259, "step": 11940 }, { "epoch": 1.0, "grad_norm": 1.1118344068527222, "learning_rate": 9.014024185421565e-05, "loss": 1.1284, "step": 11950 }, { "epoch": 1.0, "grad_norm": 0.9015932679176331, "learning_rate": 9.013184413839437e-05, "loss": 1.1224, "step": 11960 }, { "epoch": 1.0, "grad_norm": 0.8888578414916992, "learning_rate": 9.012344642257307e-05, "loss": 1.1172, "step": 11970 }, { "epoch": 1.0, "grad_norm": 0.8929213881492615, "learning_rate": 9.011504870675177e-05, "loss": 1.116, "step": 11980 }, { "epoch": 1.01, "grad_norm": 0.9146283864974976, "learning_rate": 9.010665099093048e-05, "loss": 1.1452, "step": 11990 }, { "epoch": 1.01, "grad_norm": 0.9056942462921143, "learning_rate": 9.009825327510918e-05, "loss": 1.1497, "step": 12000 }, { "epoch": 1.01, "grad_norm": 0.9360379576683044, "learning_rate": 9.008985555928788e-05, "loss": 1.1482, "step": 12010 }, { "epoch": 1.01, "grad_norm": 0.9058497548103333, "learning_rate": 9.008145784346658e-05, "loss": 1.1476, "step": 12020 }, { "epoch": 1.01, "grad_norm": 0.9097636938095093, "learning_rate": 9.007306012764528e-05, "loss": 1.1122, "step": 12030 }, { "epoch": 1.01, "grad_norm": 0.9412002563476562, "learning_rate": 9.006466241182398e-05, "loss": 1.1507, "step": 12040 }, { "epoch": 1.01, "grad_norm": 0.9335237741470337, "learning_rate": 9.005626469600269e-05, "loss": 1.138, "step": 12050 }, { "epoch": 1.01, "grad_norm": 0.8122248649597168, "learning_rate": 9.00478669801814e-05, "loss": 1.1395, "step": 12060 }, { "epoch": 1.01, "grad_norm": 0.9185567498207092, "learning_rate": 9.00394692643601e-05, "loss": 1.1466, "step": 12070 }, { "epoch": 1.01, "grad_norm": 0.8701349496841431, "learning_rate": 9.00310715485388e-05, "loss": 1.1274, "step": 12080 }, { "epoch": 1.01, "grad_norm": 0.9429726600646973, "learning_rate": 9.00226738327175e-05, "loss": 1.1621, "step": 12090 }, { "epoch": 1.01, "grad_norm": 0.8825283646583557, "learning_rate": 9.001427611689622e-05, "loss": 1.1305, "step": 12100 }, { "epoch": 1.02, "grad_norm": 0.938089907169342, "learning_rate": 9.000587840107492e-05, "loss": 1.1252, "step": 12110 }, { "epoch": 1.02, "grad_norm": 0.8835190534591675, "learning_rate": 8.99974806852536e-05, "loss": 1.1597, "step": 12120 }, { "epoch": 1.02, "grad_norm": 0.9665217399597168, "learning_rate": 8.998908296943232e-05, "loss": 1.1147, "step": 12130 }, { "epoch": 1.02, "grad_norm": 0.9050577282905579, "learning_rate": 8.998068525361102e-05, "loss": 1.1258, "step": 12140 }, { "epoch": 1.02, "grad_norm": 0.9124248623847961, "learning_rate": 8.997228753778972e-05, "loss": 1.1242, "step": 12150 }, { "epoch": 1.02, "grad_norm": 0.9285717010498047, "learning_rate": 8.996388982196843e-05, "loss": 1.1339, "step": 12160 }, { "epoch": 1.02, "grad_norm": 0.9225600361824036, "learning_rate": 8.995549210614713e-05, "loss": 1.1437, "step": 12170 }, { "epoch": 1.02, "grad_norm": 0.9179040193557739, "learning_rate": 8.994709439032583e-05, "loss": 1.142, "step": 12180 }, { "epoch": 1.02, "grad_norm": 0.9169156551361084, "learning_rate": 8.993869667450454e-05, "loss": 1.1602, "step": 12190 }, { "epoch": 1.02, "grad_norm": 0.8557838201522827, "learning_rate": 8.993029895868325e-05, "loss": 1.1229, "step": 12200 }, { "epoch": 1.02, "grad_norm": 0.9075002670288086, "learning_rate": 8.992190124286195e-05, "loss": 1.133, "step": 12210 }, { "epoch": 1.02, "grad_norm": 0.8852888345718384, "learning_rate": 8.991350352704065e-05, "loss": 1.1424, "step": 12220 }, { "epoch": 1.03, "grad_norm": 0.884982168674469, "learning_rate": 8.990510581121935e-05, "loss": 1.1372, "step": 12230 }, { "epoch": 1.03, "grad_norm": 0.8553844690322876, "learning_rate": 8.989670809539805e-05, "loss": 1.142, "step": 12240 }, { "epoch": 1.03, "grad_norm": 0.9492677450180054, "learning_rate": 8.988831037957675e-05, "loss": 1.1527, "step": 12250 }, { "epoch": 1.03, "grad_norm": 0.9053412079811096, "learning_rate": 8.987991266375547e-05, "loss": 1.1275, "step": 12260 }, { "epoch": 1.03, "grad_norm": 0.9139066338539124, "learning_rate": 8.987151494793417e-05, "loss": 1.1351, "step": 12270 }, { "epoch": 1.03, "grad_norm": 0.9355369210243225, "learning_rate": 8.986311723211287e-05, "loss": 1.1199, "step": 12280 }, { "epoch": 1.03, "grad_norm": 0.9116610884666443, "learning_rate": 8.985471951629157e-05, "loss": 1.1324, "step": 12290 }, { "epoch": 1.03, "grad_norm": 0.8947935700416565, "learning_rate": 8.984632180047028e-05, "loss": 1.1375, "step": 12300 }, { "epoch": 1.03, "grad_norm": 0.8590942025184631, "learning_rate": 8.983792408464898e-05, "loss": 1.1369, "step": 12310 }, { "epoch": 1.03, "grad_norm": 1.006734848022461, "learning_rate": 8.982952636882769e-05, "loss": 1.131, "step": 12320 }, { "epoch": 1.03, "grad_norm": 0.9852463006973267, "learning_rate": 8.982112865300639e-05, "loss": 1.1401, "step": 12330 }, { "epoch": 1.03, "grad_norm": 0.9508471488952637, "learning_rate": 8.981273093718509e-05, "loss": 1.1185, "step": 12340 }, { "epoch": 1.04, "grad_norm": 0.9054518342018127, "learning_rate": 8.980433322136379e-05, "loss": 1.1347, "step": 12350 }, { "epoch": 1.04, "grad_norm": 0.9307007193565369, "learning_rate": 8.979593550554249e-05, "loss": 1.1169, "step": 12360 }, { "epoch": 1.04, "grad_norm": 0.9499795436859131, "learning_rate": 8.97875377897212e-05, "loss": 1.1481, "step": 12370 }, { "epoch": 1.04, "grad_norm": 0.9338065385818481, "learning_rate": 8.97791400738999e-05, "loss": 1.1127, "step": 12380 }, { "epoch": 1.04, "grad_norm": 0.8858123421669006, "learning_rate": 8.97707423580786e-05, "loss": 1.1279, "step": 12390 }, { "epoch": 1.04, "grad_norm": 0.9713876843452454, "learning_rate": 8.976234464225732e-05, "loss": 1.152, "step": 12400 }, { "epoch": 1.04, "grad_norm": 0.9448899030685425, "learning_rate": 8.975394692643602e-05, "loss": 1.116, "step": 12410 }, { "epoch": 1.04, "grad_norm": 0.9086986780166626, "learning_rate": 8.974554921061472e-05, "loss": 1.1483, "step": 12420 }, { "epoch": 1.04, "grad_norm": 0.8648169040679932, "learning_rate": 8.973715149479342e-05, "loss": 1.1272, "step": 12430 }, { "epoch": 1.04, "grad_norm": 1.0005784034729004, "learning_rate": 8.972875377897212e-05, "loss": 1.1388, "step": 12440 }, { "epoch": 1.04, "grad_norm": 0.9356294870376587, "learning_rate": 8.972035606315082e-05, "loss": 1.1399, "step": 12450 }, { "epoch": 1.04, "grad_norm": 0.8659923672676086, "learning_rate": 8.971195834732952e-05, "loss": 1.1269, "step": 12460 }, { "epoch": 1.05, "grad_norm": 0.8799879550933838, "learning_rate": 8.970356063150824e-05, "loss": 1.1384, "step": 12470 }, { "epoch": 1.05, "grad_norm": 0.9933121800422668, "learning_rate": 8.969516291568694e-05, "loss": 1.1437, "step": 12480 }, { "epoch": 1.05, "grad_norm": 0.9219959378242493, "learning_rate": 8.968676519986564e-05, "loss": 1.1333, "step": 12490 }, { "epoch": 1.05, "grad_norm": 0.8931273818016052, "learning_rate": 8.967836748404435e-05, "loss": 1.1088, "step": 12500 }, { "epoch": 1.05, "grad_norm": 0.9934206604957581, "learning_rate": 8.966996976822305e-05, "loss": 1.1439, "step": 12510 }, { "epoch": 1.05, "grad_norm": 0.9685350656509399, "learning_rate": 8.966157205240175e-05, "loss": 1.1238, "step": 12520 }, { "epoch": 1.05, "grad_norm": 0.9198681116104126, "learning_rate": 8.965317433658045e-05, "loss": 1.1155, "step": 12530 }, { "epoch": 1.05, "grad_norm": 0.9107829928398132, "learning_rate": 8.964477662075917e-05, "loss": 1.1306, "step": 12540 }, { "epoch": 1.05, "grad_norm": 0.8961829543113708, "learning_rate": 8.963637890493786e-05, "loss": 1.1327, "step": 12550 }, { "epoch": 1.05, "grad_norm": 0.8820117712020874, "learning_rate": 8.962798118911656e-05, "loss": 1.1368, "step": 12560 }, { "epoch": 1.05, "grad_norm": 0.9899251461029053, "learning_rate": 8.961958347329527e-05, "loss": 1.1217, "step": 12570 }, { "epoch": 1.05, "grad_norm": 0.8744221329689026, "learning_rate": 8.961118575747397e-05, "loss": 1.1484, "step": 12580 }, { "epoch": 1.06, "grad_norm": 0.8801760077476501, "learning_rate": 8.960278804165267e-05, "loss": 1.1265, "step": 12590 }, { "epoch": 1.06, "grad_norm": 0.888437807559967, "learning_rate": 8.959439032583137e-05, "loss": 1.1398, "step": 12600 }, { "epoch": 1.06, "grad_norm": 0.9583972096443176, "learning_rate": 8.958599261001009e-05, "loss": 1.1228, "step": 12610 }, { "epoch": 1.06, "grad_norm": 0.8810545206069946, "learning_rate": 8.957759489418879e-05, "loss": 1.1467, "step": 12620 }, { "epoch": 1.06, "grad_norm": 0.8706270456314087, "learning_rate": 8.956919717836749e-05, "loss": 1.1321, "step": 12630 }, { "epoch": 1.06, "grad_norm": 0.9265187382698059, "learning_rate": 8.95607994625462e-05, "loss": 1.1348, "step": 12640 }, { "epoch": 1.06, "grad_norm": 0.8875660300254822, "learning_rate": 8.955240174672489e-05, "loss": 1.132, "step": 12650 }, { "epoch": 1.06, "grad_norm": 0.9585201740264893, "learning_rate": 8.954400403090359e-05, "loss": 1.1255, "step": 12660 }, { "epoch": 1.06, "grad_norm": 0.9146342277526855, "learning_rate": 8.95356063150823e-05, "loss": 1.0998, "step": 12670 }, { "epoch": 1.06, "grad_norm": 0.9078376293182373, "learning_rate": 8.9527208599261e-05, "loss": 1.1111, "step": 12680 }, { "epoch": 1.06, "grad_norm": 0.8908431529998779, "learning_rate": 8.95188108834397e-05, "loss": 1.1231, "step": 12690 }, { "epoch": 1.06, "grad_norm": 0.9033951163291931, "learning_rate": 8.95104131676184e-05, "loss": 1.1354, "step": 12700 }, { "epoch": 1.07, "grad_norm": 0.8694300055503845, "learning_rate": 8.950201545179712e-05, "loss": 1.1149, "step": 12710 }, { "epoch": 1.07, "grad_norm": 0.9056485295295715, "learning_rate": 8.949361773597582e-05, "loss": 1.1206, "step": 12720 }, { "epoch": 1.07, "grad_norm": 0.8865606188774109, "learning_rate": 8.948522002015452e-05, "loss": 1.1173, "step": 12730 }, { "epoch": 1.07, "grad_norm": 0.8753979206085205, "learning_rate": 8.947682230433324e-05, "loss": 1.1304, "step": 12740 }, { "epoch": 1.07, "grad_norm": 0.9080657362937927, "learning_rate": 8.946842458851194e-05, "loss": 1.1218, "step": 12750 }, { "epoch": 1.07, "grad_norm": 0.9731104373931885, "learning_rate": 8.946002687269062e-05, "loss": 1.1478, "step": 12760 }, { "epoch": 1.07, "grad_norm": 0.9440938830375671, "learning_rate": 8.945162915686934e-05, "loss": 1.1138, "step": 12770 }, { "epoch": 1.07, "grad_norm": 0.8991868495941162, "learning_rate": 8.944323144104804e-05, "loss": 1.1316, "step": 12780 }, { "epoch": 1.07, "grad_norm": 0.9543919563293457, "learning_rate": 8.943483372522674e-05, "loss": 1.1335, "step": 12790 }, { "epoch": 1.07, "grad_norm": 0.9015852808952332, "learning_rate": 8.942643600940544e-05, "loss": 1.1245, "step": 12800 }, { "epoch": 1.07, "grad_norm": 0.979584813117981, "learning_rate": 8.941803829358415e-05, "loss": 1.1523, "step": 12810 }, { "epoch": 1.07, "grad_norm": 0.9773133993148804, "learning_rate": 8.940964057776286e-05, "loss": 1.1287, "step": 12820 }, { "epoch": 1.08, "grad_norm": 0.9675416946411133, "learning_rate": 8.940124286194156e-05, "loss": 1.1153, "step": 12830 }, { "epoch": 1.08, "grad_norm": 0.9855402708053589, "learning_rate": 8.939284514612026e-05, "loss": 1.14, "step": 12840 }, { "epoch": 1.08, "grad_norm": 0.8832024335861206, "learning_rate": 8.938444743029897e-05, "loss": 1.1252, "step": 12850 }, { "epoch": 1.08, "grad_norm": 0.9848988056182861, "learning_rate": 8.937604971447766e-05, "loss": 1.1218, "step": 12860 }, { "epoch": 1.08, "grad_norm": 0.9306350350379944, "learning_rate": 8.936765199865636e-05, "loss": 1.1239, "step": 12870 }, { "epoch": 1.08, "grad_norm": 0.9815962910652161, "learning_rate": 8.935925428283507e-05, "loss": 1.1228, "step": 12880 }, { "epoch": 1.08, "grad_norm": 0.9946402907371521, "learning_rate": 8.935085656701377e-05, "loss": 1.1246, "step": 12890 }, { "epoch": 1.08, "grad_norm": 0.8948855996131897, "learning_rate": 8.934245885119247e-05, "loss": 1.1028, "step": 12900 }, { "epoch": 1.08, "grad_norm": 0.9454535245895386, "learning_rate": 8.933406113537119e-05, "loss": 1.1268, "step": 12910 }, { "epoch": 1.08, "grad_norm": 1.086484670639038, "learning_rate": 8.932566341954989e-05, "loss": 1.1287, "step": 12920 }, { "epoch": 1.08, "grad_norm": 0.9772274494171143, "learning_rate": 8.931726570372859e-05, "loss": 1.1373, "step": 12930 }, { "epoch": 1.08, "grad_norm": 0.9615238308906555, "learning_rate": 8.930886798790729e-05, "loss": 1.1153, "step": 12940 }, { "epoch": 1.09, "grad_norm": 0.8882322311401367, "learning_rate": 8.9300470272086e-05, "loss": 1.1171, "step": 12950 }, { "epoch": 1.09, "grad_norm": 0.9522902965545654, "learning_rate": 8.92920725562647e-05, "loss": 1.1239, "step": 12960 }, { "epoch": 1.09, "grad_norm": 0.9135017991065979, "learning_rate": 8.928367484044339e-05, "loss": 1.1188, "step": 12970 }, { "epoch": 1.09, "grad_norm": 0.9306880235671997, "learning_rate": 8.927527712462211e-05, "loss": 1.123, "step": 12980 }, { "epoch": 1.09, "grad_norm": 0.9436923861503601, "learning_rate": 8.926687940880081e-05, "loss": 1.1585, "step": 12990 }, { "epoch": 1.09, "grad_norm": 0.9140714406967163, "learning_rate": 8.925848169297951e-05, "loss": 1.1146, "step": 13000 }, { "epoch": 1.09, "grad_norm": 0.9659335613250732, "learning_rate": 8.925008397715822e-05, "loss": 1.1426, "step": 13010 }, { "epoch": 1.09, "grad_norm": 0.8587934374809265, "learning_rate": 8.924168626133692e-05, "loss": 1.1476, "step": 13020 }, { "epoch": 1.09, "grad_norm": 0.8232706189155579, "learning_rate": 8.923328854551562e-05, "loss": 1.1218, "step": 13030 }, { "epoch": 1.09, "grad_norm": 0.9183794856071472, "learning_rate": 8.922489082969432e-05, "loss": 1.1379, "step": 13040 }, { "epoch": 1.09, "grad_norm": 0.9124522805213928, "learning_rate": 8.921649311387304e-05, "loss": 1.1068, "step": 13050 }, { "epoch": 1.09, "grad_norm": 0.9751561284065247, "learning_rate": 8.920809539805174e-05, "loss": 1.1305, "step": 13060 }, { "epoch": 1.1, "grad_norm": 0.8878384232521057, "learning_rate": 8.919969768223044e-05, "loss": 1.1397, "step": 13070 }, { "epoch": 1.1, "grad_norm": 0.9667701125144958, "learning_rate": 8.919129996640914e-05, "loss": 1.1258, "step": 13080 }, { "epoch": 1.1, "grad_norm": 0.9521215558052063, "learning_rate": 8.918290225058784e-05, "loss": 1.1199, "step": 13090 }, { "epoch": 1.1, "grad_norm": 0.9559984803199768, "learning_rate": 8.917450453476654e-05, "loss": 1.1239, "step": 13100 }, { "epoch": 1.1, "grad_norm": 0.9714217782020569, "learning_rate": 8.916610681894526e-05, "loss": 1.1227, "step": 13110 }, { "epoch": 1.1, "grad_norm": 0.9680772423744202, "learning_rate": 8.915770910312396e-05, "loss": 1.1287, "step": 13120 }, { "epoch": 1.1, "grad_norm": 0.8694576621055603, "learning_rate": 8.914931138730266e-05, "loss": 1.1511, "step": 13130 }, { "epoch": 1.1, "grad_norm": 0.9465891718864441, "learning_rate": 8.914091367148136e-05, "loss": 1.1347, "step": 13140 }, { "epoch": 1.1, "grad_norm": 0.9178768396377563, "learning_rate": 8.913251595566007e-05, "loss": 1.1349, "step": 13150 }, { "epoch": 1.1, "grad_norm": 0.9185075759887695, "learning_rate": 8.91249580114209e-05, "loss": 1.1106, "step": 13160 }, { "epoch": 1.1, "grad_norm": 0.8623015880584717, "learning_rate": 8.911656029559961e-05, "loss": 1.1217, "step": 13170 }, { "epoch": 1.1, "grad_norm": 0.9602338671684265, "learning_rate": 8.910816257977831e-05, "loss": 1.1426, "step": 13180 }, { "epoch": 1.11, "grad_norm": 0.9467712640762329, "learning_rate": 8.9099764863957e-05, "loss": 1.1318, "step": 13190 }, { "epoch": 1.11, "grad_norm": 1.0699985027313232, "learning_rate": 8.909136714813571e-05, "loss": 1.1224, "step": 13200 }, { "epoch": 1.11, "grad_norm": 0.9185535311698914, "learning_rate": 8.908296943231441e-05, "loss": 1.1364, "step": 13210 }, { "epoch": 1.11, "grad_norm": 0.8876036405563354, "learning_rate": 8.907457171649311e-05, "loss": 1.1465, "step": 13220 }, { "epoch": 1.11, "grad_norm": 0.9188990592956543, "learning_rate": 8.906617400067183e-05, "loss": 1.1087, "step": 13230 }, { "epoch": 1.11, "grad_norm": 0.8698427677154541, "learning_rate": 8.905777628485053e-05, "loss": 1.123, "step": 13240 }, { "epoch": 1.11, "grad_norm": 0.9527921080589294, "learning_rate": 8.904937856902923e-05, "loss": 1.1522, "step": 13250 }, { "epoch": 1.11, "grad_norm": 0.9535458087921143, "learning_rate": 8.904098085320793e-05, "loss": 1.1181, "step": 13260 }, { "epoch": 1.11, "grad_norm": 0.9558416604995728, "learning_rate": 8.903258313738664e-05, "loss": 1.1384, "step": 13270 }, { "epoch": 1.11, "grad_norm": 0.9938929677009583, "learning_rate": 8.902418542156534e-05, "loss": 1.1259, "step": 13280 }, { "epoch": 1.11, "grad_norm": 0.8923820853233337, "learning_rate": 8.901578770574404e-05, "loss": 1.1163, "step": 13290 }, { "epoch": 1.11, "grad_norm": 0.8947280049324036, "learning_rate": 8.900738998992274e-05, "loss": 1.1203, "step": 13300 }, { "epoch": 1.12, "grad_norm": 0.9547944664955139, "learning_rate": 8.899899227410144e-05, "loss": 1.1546, "step": 13310 }, { "epoch": 1.12, "grad_norm": 0.9443290829658508, "learning_rate": 8.899059455828015e-05, "loss": 1.1091, "step": 13320 }, { "epoch": 1.12, "grad_norm": 0.8698960542678833, "learning_rate": 8.898219684245886e-05, "loss": 1.1139, "step": 13330 }, { "epoch": 1.12, "grad_norm": 0.9820073843002319, "learning_rate": 8.897379912663756e-05, "loss": 1.0837, "step": 13340 }, { "epoch": 1.12, "grad_norm": 0.9299082159996033, "learning_rate": 8.896540141081626e-05, "loss": 1.1372, "step": 13350 }, { "epoch": 1.12, "grad_norm": 0.929469108581543, "learning_rate": 8.895700369499496e-05, "loss": 1.146, "step": 13360 }, { "epoch": 1.12, "grad_norm": 0.9092618823051453, "learning_rate": 8.894860597917368e-05, "loss": 1.1207, "step": 13370 }, { "epoch": 1.12, "grad_norm": 0.9252662062644958, "learning_rate": 8.894020826335238e-05, "loss": 1.1167, "step": 13380 }, { "epoch": 1.12, "grad_norm": 0.8673176169395447, "learning_rate": 8.893181054753108e-05, "loss": 1.1265, "step": 13390 }, { "epoch": 1.12, "grad_norm": 0.8885186910629272, "learning_rate": 8.892341283170978e-05, "loss": 1.142, "step": 13400 }, { "epoch": 1.12, "grad_norm": 0.9046466946601868, "learning_rate": 8.891501511588848e-05, "loss": 1.1231, "step": 13410 }, { "epoch": 1.13, "grad_norm": 0.954371452331543, "learning_rate": 8.890661740006718e-05, "loss": 1.1062, "step": 13420 }, { "epoch": 1.13, "grad_norm": 0.9183551669120789, "learning_rate": 8.889821968424588e-05, "loss": 1.1086, "step": 13430 }, { "epoch": 1.13, "grad_norm": 0.9714826941490173, "learning_rate": 8.88898219684246e-05, "loss": 1.1209, "step": 13440 }, { "epoch": 1.13, "grad_norm": 0.8576006293296814, "learning_rate": 8.88814242526033e-05, "loss": 1.1001, "step": 13450 }, { "epoch": 1.13, "grad_norm": 0.8900761008262634, "learning_rate": 8.8873026536782e-05, "loss": 1.1248, "step": 13460 }, { "epoch": 1.13, "grad_norm": 0.8779008984565735, "learning_rate": 8.886462882096071e-05, "loss": 1.105, "step": 13470 }, { "epoch": 1.13, "grad_norm": 0.8928524851799011, "learning_rate": 8.885623110513941e-05, "loss": 1.1053, "step": 13480 }, { "epoch": 1.13, "grad_norm": 0.8737199902534485, "learning_rate": 8.884783338931811e-05, "loss": 1.1408, "step": 13490 }, { "epoch": 1.13, "grad_norm": 0.951984703540802, "learning_rate": 8.883943567349681e-05, "loss": 1.1152, "step": 13500 }, { "epoch": 1.13, "grad_norm": 0.9574511647224426, "learning_rate": 8.883103795767551e-05, "loss": 1.1118, "step": 13510 }, { "epoch": 1.13, "grad_norm": 0.9811375737190247, "learning_rate": 8.882264024185421e-05, "loss": 1.159, "step": 13520 }, { "epoch": 1.13, "grad_norm": 0.923875093460083, "learning_rate": 8.881424252603291e-05, "loss": 1.1295, "step": 13530 }, { "epoch": 1.14, "grad_norm": 0.9695706367492676, "learning_rate": 8.880584481021163e-05, "loss": 1.132, "step": 13540 }, { "epoch": 1.14, "grad_norm": 1.0641175508499146, "learning_rate": 8.879744709439033e-05, "loss": 1.1273, "step": 13550 }, { "epoch": 1.14, "grad_norm": 0.870869517326355, "learning_rate": 8.878904937856903e-05, "loss": 1.1101, "step": 13560 }, { "epoch": 1.14, "grad_norm": 0.937880277633667, "learning_rate": 8.878065166274774e-05, "loss": 1.0993, "step": 13570 }, { "epoch": 1.14, "grad_norm": 0.8831961154937744, "learning_rate": 8.877225394692644e-05, "loss": 1.1077, "step": 13580 }, { "epoch": 1.14, "grad_norm": 0.8785178065299988, "learning_rate": 8.876385623110515e-05, "loss": 1.1198, "step": 13590 }, { "epoch": 1.14, "grad_norm": 0.9101758599281311, "learning_rate": 8.875545851528385e-05, "loss": 1.1316, "step": 13600 }, { "epoch": 1.14, "grad_norm": 0.9287114143371582, "learning_rate": 8.874706079946256e-05, "loss": 1.0877, "step": 13610 }, { "epoch": 1.14, "grad_norm": 0.9042177796363831, "learning_rate": 8.873866308364125e-05, "loss": 1.1398, "step": 13620 }, { "epoch": 1.14, "grad_norm": 0.9239535331726074, "learning_rate": 8.873026536781995e-05, "loss": 1.137, "step": 13630 }, { "epoch": 1.14, "grad_norm": 0.9701600670814514, "learning_rate": 8.872186765199866e-05, "loss": 1.1118, "step": 13640 }, { "epoch": 1.14, "grad_norm": 0.9426175951957703, "learning_rate": 8.871346993617736e-05, "loss": 1.1129, "step": 13650 }, { "epoch": 1.15, "grad_norm": 0.8625684380531311, "learning_rate": 8.870507222035606e-05, "loss": 1.1198, "step": 13660 }, { "epoch": 1.15, "grad_norm": 0.9156553149223328, "learning_rate": 8.869667450453476e-05, "loss": 1.0969, "step": 13670 }, { "epoch": 1.15, "grad_norm": 0.9108346700668335, "learning_rate": 8.868827678871348e-05, "loss": 1.0853, "step": 13680 }, { "epoch": 1.15, "grad_norm": 0.9348630309104919, "learning_rate": 8.867987907289218e-05, "loss": 1.1061, "step": 13690 }, { "epoch": 1.15, "grad_norm": 0.9263437390327454, "learning_rate": 8.867148135707088e-05, "loss": 1.0909, "step": 13700 }, { "epoch": 1.15, "grad_norm": 0.9103613495826721, "learning_rate": 8.86630836412496e-05, "loss": 1.1466, "step": 13710 }, { "epoch": 1.15, "grad_norm": 0.8632253408432007, "learning_rate": 8.865468592542828e-05, "loss": 1.1456, "step": 13720 }, { "epoch": 1.15, "grad_norm": 0.9509271383285522, "learning_rate": 8.864628820960698e-05, "loss": 1.1197, "step": 13730 }, { "epoch": 1.15, "grad_norm": 0.9113433957099915, "learning_rate": 8.86378904937857e-05, "loss": 1.114, "step": 13740 }, { "epoch": 1.15, "grad_norm": 0.8760579824447632, "learning_rate": 8.86294927779644e-05, "loss": 1.1264, "step": 13750 }, { "epoch": 1.15, "grad_norm": 0.8918175101280212, "learning_rate": 8.86210950621431e-05, "loss": 1.1495, "step": 13760 }, { "epoch": 1.15, "grad_norm": 0.8605629205703735, "learning_rate": 8.86126973463218e-05, "loss": 1.1184, "step": 13770 }, { "epoch": 1.16, "grad_norm": 0.9115935564041138, "learning_rate": 8.860429963050051e-05, "loss": 1.1158, "step": 13780 }, { "epoch": 1.16, "grad_norm": 0.8542095422744751, "learning_rate": 8.859590191467921e-05, "loss": 1.0958, "step": 13790 }, { "epoch": 1.16, "grad_norm": 0.9421138763427734, "learning_rate": 8.858750419885791e-05, "loss": 1.1353, "step": 13800 }, { "epoch": 1.16, "grad_norm": 0.9435168504714966, "learning_rate": 8.857910648303663e-05, "loss": 1.1387, "step": 13810 }, { "epoch": 1.16, "grad_norm": 0.8814151883125305, "learning_rate": 8.857070876721533e-05, "loss": 1.1259, "step": 13820 }, { "epoch": 1.16, "grad_norm": 0.9127605557441711, "learning_rate": 8.856231105139402e-05, "loss": 1.1021, "step": 13830 }, { "epoch": 1.16, "grad_norm": 0.9513577222824097, "learning_rate": 8.855391333557273e-05, "loss": 1.1498, "step": 13840 }, { "epoch": 1.16, "grad_norm": 1.018366813659668, "learning_rate": 8.854551561975143e-05, "loss": 1.1185, "step": 13850 }, { "epoch": 1.16, "grad_norm": 1.0120519399642944, "learning_rate": 8.853711790393013e-05, "loss": 1.1099, "step": 13860 }, { "epoch": 1.16, "grad_norm": 0.9408965110778809, "learning_rate": 8.852872018810883e-05, "loss": 1.13, "step": 13870 }, { "epoch": 1.16, "grad_norm": 0.8699688911437988, "learning_rate": 8.852032247228755e-05, "loss": 1.1464, "step": 13880 }, { "epoch": 1.16, "grad_norm": 0.9382355809211731, "learning_rate": 8.851192475646625e-05, "loss": 1.126, "step": 13890 }, { "epoch": 1.17, "grad_norm": 0.9635114669799805, "learning_rate": 8.850352704064495e-05, "loss": 1.1163, "step": 13900 }, { "epoch": 1.17, "grad_norm": 0.9410046935081482, "learning_rate": 8.849512932482365e-05, "loss": 1.1236, "step": 13910 }, { "epoch": 1.17, "grad_norm": 0.9217486381530762, "learning_rate": 8.848673160900236e-05, "loss": 1.1144, "step": 13920 }, { "epoch": 1.17, "grad_norm": 0.874560534954071, "learning_rate": 8.847833389318105e-05, "loss": 1.1266, "step": 13930 }, { "epoch": 1.17, "grad_norm": 0.8844665884971619, "learning_rate": 8.846993617735976e-05, "loss": 1.1049, "step": 13940 }, { "epoch": 1.17, "grad_norm": 0.9224509000778198, "learning_rate": 8.846153846153847e-05, "loss": 1.1361, "step": 13950 }, { "epoch": 1.17, "grad_norm": 0.9551849961280823, "learning_rate": 8.845314074571717e-05, "loss": 1.1304, "step": 13960 }, { "epoch": 1.17, "grad_norm": 0.9024940729141235, "learning_rate": 8.844474302989587e-05, "loss": 1.0848, "step": 13970 }, { "epoch": 1.17, "grad_norm": 0.9649220108985901, "learning_rate": 8.843634531407458e-05, "loss": 1.1057, "step": 13980 }, { "epoch": 1.17, "grad_norm": 0.920486330986023, "learning_rate": 8.842794759825328e-05, "loss": 1.1071, "step": 13990 }, { "epoch": 1.17, "grad_norm": 0.8891380429267883, "learning_rate": 8.841954988243198e-05, "loss": 1.11, "step": 14000 }, { "epoch": 1.17, "grad_norm": 0.9146546125411987, "learning_rate": 8.841115216661068e-05, "loss": 1.1002, "step": 14010 }, { "epoch": 1.18, "grad_norm": 0.9723424911499023, "learning_rate": 8.84027544507894e-05, "loss": 1.1125, "step": 14020 }, { "epoch": 1.18, "grad_norm": 0.9875556826591492, "learning_rate": 8.83943567349681e-05, "loss": 1.1088, "step": 14030 }, { "epoch": 1.18, "grad_norm": 0.9426760673522949, "learning_rate": 8.838595901914679e-05, "loss": 1.1558, "step": 14040 }, { "epoch": 1.18, "grad_norm": 0.9075933694839478, "learning_rate": 8.83775613033255e-05, "loss": 1.0947, "step": 14050 }, { "epoch": 1.18, "grad_norm": 0.8498238921165466, "learning_rate": 8.83691635875042e-05, "loss": 1.121, "step": 14060 }, { "epoch": 1.18, "grad_norm": 0.9222276210784912, "learning_rate": 8.83607658716829e-05, "loss": 1.1325, "step": 14070 }, { "epoch": 1.18, "grad_norm": 0.8715246319770813, "learning_rate": 8.835236815586161e-05, "loss": 1.1274, "step": 14080 }, { "epoch": 1.18, "grad_norm": 0.8748525381088257, "learning_rate": 8.834397044004032e-05, "loss": 1.1333, "step": 14090 }, { "epoch": 1.18, "grad_norm": 0.8961807489395142, "learning_rate": 8.833557272421902e-05, "loss": 1.1172, "step": 14100 }, { "epoch": 1.18, "grad_norm": 0.9251819252967834, "learning_rate": 8.832717500839772e-05, "loss": 1.0832, "step": 14110 }, { "epoch": 1.18, "grad_norm": 0.8879318833351135, "learning_rate": 8.831877729257643e-05, "loss": 1.1097, "step": 14120 }, { "epoch": 1.18, "grad_norm": 0.9097147583961487, "learning_rate": 8.831037957675513e-05, "loss": 1.1073, "step": 14130 }, { "epoch": 1.19, "grad_norm": 0.9001836776733398, "learning_rate": 8.830198186093383e-05, "loss": 1.1066, "step": 14140 }, { "epoch": 1.19, "grad_norm": 0.8732591867446899, "learning_rate": 8.829358414511253e-05, "loss": 1.1208, "step": 14150 }, { "epoch": 1.19, "grad_norm": 0.8660206198692322, "learning_rate": 8.828518642929123e-05, "loss": 1.1289, "step": 14160 }, { "epoch": 1.19, "grad_norm": 0.8846068978309631, "learning_rate": 8.827678871346993e-05, "loss": 1.1122, "step": 14170 }, { "epoch": 1.19, "grad_norm": 0.9000621438026428, "learning_rate": 8.826839099764865e-05, "loss": 1.1296, "step": 14180 }, { "epoch": 1.19, "grad_norm": 0.9542943239212036, "learning_rate": 8.825999328182735e-05, "loss": 1.1128, "step": 14190 }, { "epoch": 1.19, "grad_norm": 0.982465386390686, "learning_rate": 8.825159556600605e-05, "loss": 1.1104, "step": 14200 }, { "epoch": 1.19, "grad_norm": 0.8780309557914734, "learning_rate": 8.824319785018475e-05, "loss": 1.1371, "step": 14210 }, { "epoch": 1.19, "grad_norm": 0.9983294606208801, "learning_rate": 8.823480013436347e-05, "loss": 1.1467, "step": 14220 }, { "epoch": 1.19, "grad_norm": 0.9325399398803711, "learning_rate": 8.822640241854217e-05, "loss": 1.1222, "step": 14230 }, { "epoch": 1.19, "grad_norm": 0.9618887901306152, "learning_rate": 8.821800470272087e-05, "loss": 1.1368, "step": 14240 }, { "epoch": 1.19, "grad_norm": 0.9248318076133728, "learning_rate": 8.820960698689957e-05, "loss": 1.117, "step": 14250 }, { "epoch": 1.2, "grad_norm": 0.9239673614501953, "learning_rate": 8.820120927107827e-05, "loss": 1.1189, "step": 14260 }, { "epoch": 1.2, "grad_norm": 0.9307476878166199, "learning_rate": 8.819281155525697e-05, "loss": 1.1231, "step": 14270 }, { "epoch": 1.2, "grad_norm": 0.920782208442688, "learning_rate": 8.818441383943567e-05, "loss": 1.1253, "step": 14280 }, { "epoch": 1.2, "grad_norm": 0.90324467420578, "learning_rate": 8.817601612361438e-05, "loss": 1.1256, "step": 14290 }, { "epoch": 1.2, "grad_norm": 0.9153021574020386, "learning_rate": 8.816761840779308e-05, "loss": 1.1112, "step": 14300 }, { "epoch": 1.2, "grad_norm": 0.9251894354820251, "learning_rate": 8.815922069197178e-05, "loss": 1.1134, "step": 14310 }, { "epoch": 1.2, "grad_norm": 0.9242687821388245, "learning_rate": 8.81508229761505e-05, "loss": 1.109, "step": 14320 }, { "epoch": 1.2, "grad_norm": 1.0389292240142822, "learning_rate": 8.81424252603292e-05, "loss": 1.1053, "step": 14330 }, { "epoch": 1.2, "grad_norm": 0.8664111495018005, "learning_rate": 8.81340275445079e-05, "loss": 1.1138, "step": 14340 }, { "epoch": 1.2, "grad_norm": 0.934990644454956, "learning_rate": 8.81256298286866e-05, "loss": 1.1246, "step": 14350 }, { "epoch": 1.2, "grad_norm": 0.9289098978042603, "learning_rate": 8.81172321128653e-05, "loss": 1.1209, "step": 14360 }, { "epoch": 1.2, "grad_norm": 0.9563125371932983, "learning_rate": 8.8108834397044e-05, "loss": 1.131, "step": 14370 }, { "epoch": 1.21, "grad_norm": 0.9024096727371216, "learning_rate": 8.81004366812227e-05, "loss": 1.1249, "step": 14380 }, { "epoch": 1.21, "grad_norm": 0.93302983045578, "learning_rate": 8.809203896540142e-05, "loss": 1.1093, "step": 14390 }, { "epoch": 1.21, "grad_norm": 0.9166001677513123, "learning_rate": 8.808364124958012e-05, "loss": 1.1162, "step": 14400 }, { "epoch": 1.21, "grad_norm": 1.015295147895813, "learning_rate": 8.807524353375882e-05, "loss": 1.123, "step": 14410 }, { "epoch": 1.21, "grad_norm": 0.9155243039131165, "learning_rate": 8.806684581793753e-05, "loss": 1.1173, "step": 14420 }, { "epoch": 1.21, "grad_norm": 0.9881671071052551, "learning_rate": 8.805844810211623e-05, "loss": 1.1129, "step": 14430 }, { "epoch": 1.21, "grad_norm": 0.9244381189346313, "learning_rate": 8.805005038629493e-05, "loss": 1.1483, "step": 14440 }, { "epoch": 1.21, "grad_norm": 0.8698108792304993, "learning_rate": 8.804165267047364e-05, "loss": 1.1186, "step": 14450 }, { "epoch": 1.21, "grad_norm": 0.9278691411018372, "learning_rate": 8.803325495465234e-05, "loss": 1.1311, "step": 14460 }, { "epoch": 1.21, "grad_norm": 0.9126485586166382, "learning_rate": 8.802485723883104e-05, "loss": 1.1314, "step": 14470 }, { "epoch": 1.21, "grad_norm": 0.9807928204536438, "learning_rate": 8.801645952300974e-05, "loss": 1.1336, "step": 14480 }, { "epoch": 1.21, "grad_norm": 1.0050467252731323, "learning_rate": 8.800806180718845e-05, "loss": 1.1156, "step": 14490 }, { "epoch": 1.22, "grad_norm": 0.8595203757286072, "learning_rate": 8.799966409136715e-05, "loss": 1.1032, "step": 14500 }, { "epoch": 1.22, "grad_norm": 0.9575943350791931, "learning_rate": 8.799126637554585e-05, "loss": 1.1299, "step": 14510 }, { "epoch": 1.22, "grad_norm": 0.9149112105369568, "learning_rate": 8.798286865972455e-05, "loss": 1.1207, "step": 14520 }, { "epoch": 1.22, "grad_norm": 0.9616824388504028, "learning_rate": 8.797447094390327e-05, "loss": 1.1279, "step": 14530 }, { "epoch": 1.22, "grad_norm": 0.9843555092811584, "learning_rate": 8.796607322808197e-05, "loss": 1.1254, "step": 14540 }, { "epoch": 1.22, "grad_norm": 0.9238868951797485, "learning_rate": 8.795767551226067e-05, "loss": 1.1162, "step": 14550 }, { "epoch": 1.22, "grad_norm": 0.9549159407615662, "learning_rate": 8.794927779643938e-05, "loss": 1.1256, "step": 14560 }, { "epoch": 1.22, "grad_norm": 0.9398096799850464, "learning_rate": 8.794088008061807e-05, "loss": 1.1107, "step": 14570 }, { "epoch": 1.22, "grad_norm": 0.9384233951568604, "learning_rate": 8.793248236479677e-05, "loss": 1.1136, "step": 14580 }, { "epoch": 1.22, "grad_norm": 0.9121764898300171, "learning_rate": 8.792408464897549e-05, "loss": 1.1168, "step": 14590 }, { "epoch": 1.22, "grad_norm": 0.9835638999938965, "learning_rate": 8.791568693315419e-05, "loss": 1.1335, "step": 14600 }, { "epoch": 1.22, "grad_norm": 1.090331792831421, "learning_rate": 8.790728921733289e-05, "loss": 1.085, "step": 14610 }, { "epoch": 1.23, "grad_norm": 0.9265365600585938, "learning_rate": 8.789889150151159e-05, "loss": 1.0982, "step": 14620 }, { "epoch": 1.23, "grad_norm": 0.8829748034477234, "learning_rate": 8.78904937856903e-05, "loss": 1.1142, "step": 14630 }, { "epoch": 1.23, "grad_norm": 0.9203078746795654, "learning_rate": 8.7882096069869e-05, "loss": 1.1174, "step": 14640 }, { "epoch": 1.23, "grad_norm": 0.954437255859375, "learning_rate": 8.78736983540477e-05, "loss": 1.1255, "step": 14650 }, { "epoch": 1.23, "grad_norm": 0.9201489090919495, "learning_rate": 8.786530063822642e-05, "loss": 1.1278, "step": 14660 }, { "epoch": 1.23, "grad_norm": 0.9098379015922546, "learning_rate": 8.785690292240512e-05, "loss": 1.1045, "step": 14670 }, { "epoch": 1.23, "grad_norm": 0.9808036088943481, "learning_rate": 8.78485052065838e-05, "loss": 1.1123, "step": 14680 }, { "epoch": 1.23, "grad_norm": 0.9970177412033081, "learning_rate": 8.784010749076252e-05, "loss": 1.1244, "step": 14690 }, { "epoch": 1.23, "grad_norm": 0.9139359593391418, "learning_rate": 8.783170977494122e-05, "loss": 1.1054, "step": 14700 }, { "epoch": 1.23, "grad_norm": 0.9273451566696167, "learning_rate": 8.782331205911992e-05, "loss": 1.0995, "step": 14710 }, { "epoch": 1.23, "grad_norm": 0.91801518201828, "learning_rate": 8.781491434329862e-05, "loss": 1.1142, "step": 14720 }, { "epoch": 1.23, "grad_norm": 0.9866279363632202, "learning_rate": 8.780651662747734e-05, "loss": 1.1147, "step": 14730 }, { "epoch": 1.24, "grad_norm": 0.9117400646209717, "learning_rate": 8.779811891165604e-05, "loss": 1.1107, "step": 14740 }, { "epoch": 1.24, "grad_norm": 0.9210174083709717, "learning_rate": 8.778972119583474e-05, "loss": 1.1188, "step": 14750 }, { "epoch": 1.24, "grad_norm": 0.9606226086616516, "learning_rate": 8.778132348001344e-05, "loss": 1.1036, "step": 14760 }, { "epoch": 1.24, "grad_norm": 0.9500154852867126, "learning_rate": 8.777292576419215e-05, "loss": 1.0973, "step": 14770 }, { "epoch": 1.24, "grad_norm": 0.9500105381011963, "learning_rate": 8.776452804837084e-05, "loss": 1.0983, "step": 14780 }, { "epoch": 1.24, "grad_norm": 0.8843684792518616, "learning_rate": 8.775613033254954e-05, "loss": 1.106, "step": 14790 }, { "epoch": 1.24, "grad_norm": 0.9123238921165466, "learning_rate": 8.774773261672825e-05, "loss": 1.1233, "step": 14800 }, { "epoch": 1.24, "grad_norm": 0.9631816744804382, "learning_rate": 8.773933490090695e-05, "loss": 1.1282, "step": 14810 }, { "epoch": 1.24, "grad_norm": 0.9595421552658081, "learning_rate": 8.773093718508566e-05, "loss": 1.1034, "step": 14820 }, { "epoch": 1.24, "grad_norm": 1.0419737100601196, "learning_rate": 8.772253946926437e-05, "loss": 1.1325, "step": 14830 }, { "epoch": 1.24, "grad_norm": 0.9546859860420227, "learning_rate": 8.771414175344307e-05, "loss": 1.1095, "step": 14840 }, { "epoch": 1.24, "grad_norm": 0.9662930965423584, "learning_rate": 8.770574403762177e-05, "loss": 1.1339, "step": 14850 }, { "epoch": 1.25, "grad_norm": 0.9654275178909302, "learning_rate": 8.769734632180047e-05, "loss": 1.114, "step": 14860 }, { "epoch": 1.25, "grad_norm": 1.0001049041748047, "learning_rate": 8.768894860597919e-05, "loss": 1.1116, "step": 14870 }, { "epoch": 1.25, "grad_norm": 0.9047713279724121, "learning_rate": 8.768055089015789e-05, "loss": 1.1283, "step": 14880 }, { "epoch": 1.25, "grad_norm": 0.8955701589584351, "learning_rate": 8.767215317433657e-05, "loss": 1.115, "step": 14890 }, { "epoch": 1.25, "grad_norm": 0.9277672171592712, "learning_rate": 8.766375545851529e-05, "loss": 1.0798, "step": 14900 }, { "epoch": 1.25, "grad_norm": 0.8995895981788635, "learning_rate": 8.765535774269399e-05, "loss": 1.0962, "step": 14910 }, { "epoch": 1.25, "grad_norm": 0.9734140038490295, "learning_rate": 8.764696002687269e-05, "loss": 1.1177, "step": 14920 }, { "epoch": 1.25, "grad_norm": 1.007621169090271, "learning_rate": 8.76385623110514e-05, "loss": 1.0963, "step": 14930 }, { "epoch": 1.25, "grad_norm": 0.9346214532852173, "learning_rate": 8.76301645952301e-05, "loss": 1.1207, "step": 14940 }, { "epoch": 1.25, "grad_norm": 0.8714445233345032, "learning_rate": 8.76217668794088e-05, "loss": 1.1019, "step": 14950 }, { "epoch": 1.25, "grad_norm": 0.9773920774459839, "learning_rate": 8.76133691635875e-05, "loss": 1.1345, "step": 14960 }, { "epoch": 1.25, "grad_norm": 0.962600588798523, "learning_rate": 8.760497144776622e-05, "loss": 1.1099, "step": 14970 }, { "epoch": 1.26, "grad_norm": 0.9157037138938904, "learning_rate": 8.759657373194492e-05, "loss": 1.1178, "step": 14980 }, { "epoch": 1.26, "grad_norm": 0.9327508211135864, "learning_rate": 8.758817601612361e-05, "loss": 1.1125, "step": 14990 }, { "epoch": 1.26, "grad_norm": 0.9027820825576782, "learning_rate": 8.757977830030232e-05, "loss": 1.1151, "step": 15000 }, { "epoch": 1.26, "grad_norm": 0.9330666065216064, "learning_rate": 8.757138058448102e-05, "loss": 1.1033, "step": 15010 }, { "epoch": 1.26, "grad_norm": 0.9602764844894409, "learning_rate": 8.756298286865972e-05, "loss": 1.1159, "step": 15020 }, { "epoch": 1.26, "grad_norm": 0.9144401550292969, "learning_rate": 8.755458515283842e-05, "loss": 1.1232, "step": 15030 }, { "epoch": 1.26, "grad_norm": 0.928410530090332, "learning_rate": 8.754618743701714e-05, "loss": 1.1226, "step": 15040 }, { "epoch": 1.26, "grad_norm": 0.9314349293708801, "learning_rate": 8.753778972119584e-05, "loss": 1.1214, "step": 15050 }, { "epoch": 1.26, "grad_norm": 0.908319890499115, "learning_rate": 8.752939200537454e-05, "loss": 1.122, "step": 15060 }, { "epoch": 1.26, "grad_norm": 0.936560869216919, "learning_rate": 8.752099428955325e-05, "loss": 1.1281, "step": 15070 }, { "epoch": 1.26, "grad_norm": 1.0261152982711792, "learning_rate": 8.751259657373195e-05, "loss": 1.1326, "step": 15080 }, { "epoch": 1.27, "grad_norm": 0.9165915846824646, "learning_rate": 8.750419885791066e-05, "loss": 1.115, "step": 15090 }, { "epoch": 1.27, "grad_norm": 0.9344234466552734, "learning_rate": 8.749580114208936e-05, "loss": 1.1175, "step": 15100 }, { "epoch": 1.27, "grad_norm": 0.8795295357704163, "learning_rate": 8.748740342626806e-05, "loss": 1.1173, "step": 15110 }, { "epoch": 1.27, "grad_norm": 0.9428615570068359, "learning_rate": 8.747900571044676e-05, "loss": 1.1058, "step": 15120 }, { "epoch": 1.27, "grad_norm": 0.9324769377708435, "learning_rate": 8.747060799462546e-05, "loss": 1.1184, "step": 15130 }, { "epoch": 1.27, "grad_norm": 0.8626567125320435, "learning_rate": 8.746221027880417e-05, "loss": 1.0974, "step": 15140 }, { "epoch": 1.27, "grad_norm": 0.9009515643119812, "learning_rate": 8.745381256298287e-05, "loss": 1.1054, "step": 15150 }, { "epoch": 1.27, "grad_norm": 0.9502636194229126, "learning_rate": 8.744541484716157e-05, "loss": 1.1219, "step": 15160 }, { "epoch": 1.27, "grad_norm": 0.9786444902420044, "learning_rate": 8.743701713134029e-05, "loss": 1.1044, "step": 15170 }, { "epoch": 1.27, "grad_norm": 0.9187127351760864, "learning_rate": 8.742861941551899e-05, "loss": 1.1318, "step": 15180 }, { "epoch": 1.27, "grad_norm": 0.924630880355835, "learning_rate": 8.742022169969769e-05, "loss": 1.1403, "step": 15190 }, { "epoch": 1.27, "grad_norm": 0.9214105010032654, "learning_rate": 8.741182398387639e-05, "loss": 1.1046, "step": 15200 }, { "epoch": 1.28, "grad_norm": 0.9642462134361267, "learning_rate": 8.740342626805509e-05, "loss": 1.1336, "step": 15210 }, { "epoch": 1.28, "grad_norm": 0.9122458696365356, "learning_rate": 8.739502855223379e-05, "loss": 1.1193, "step": 15220 }, { "epoch": 1.28, "grad_norm": 0.9484336972236633, "learning_rate": 8.738663083641249e-05, "loss": 1.1147, "step": 15230 }, { "epoch": 1.28, "grad_norm": 0.9583941102027893, "learning_rate": 8.73782331205912e-05, "loss": 1.1261, "step": 15240 }, { "epoch": 1.28, "grad_norm": 0.9006879329681396, "learning_rate": 8.736983540476991e-05, "loss": 1.1185, "step": 15250 }, { "epoch": 1.28, "grad_norm": 0.9025834202766418, "learning_rate": 8.736143768894861e-05, "loss": 1.1307, "step": 15260 }, { "epoch": 1.28, "grad_norm": 0.9103566408157349, "learning_rate": 8.735303997312731e-05, "loss": 1.1085, "step": 15270 }, { "epoch": 1.28, "grad_norm": 0.9725592732429504, "learning_rate": 8.734464225730602e-05, "loss": 1.1219, "step": 15280 }, { "epoch": 1.28, "grad_norm": 0.9697563648223877, "learning_rate": 8.733624454148472e-05, "loss": 1.1152, "step": 15290 }, { "epoch": 1.28, "grad_norm": 0.8665038347244263, "learning_rate": 8.732784682566342e-05, "loss": 1.1247, "step": 15300 }, { "epoch": 1.28, "grad_norm": 0.8986729383468628, "learning_rate": 8.731944910984212e-05, "loss": 1.1243, "step": 15310 }, { "epoch": 1.28, "grad_norm": 0.9079999923706055, "learning_rate": 8.731105139402083e-05, "loss": 1.1162, "step": 15320 }, { "epoch": 1.29, "grad_norm": 0.9079917669296265, "learning_rate": 8.730265367819953e-05, "loss": 1.1044, "step": 15330 }, { "epoch": 1.29, "grad_norm": 0.9908376336097717, "learning_rate": 8.729425596237824e-05, "loss": 1.1112, "step": 15340 }, { "epoch": 1.29, "grad_norm": 0.8806149363517761, "learning_rate": 8.728585824655694e-05, "loss": 1.112, "step": 15350 }, { "epoch": 1.29, "grad_norm": 0.9354682564735413, "learning_rate": 8.727746053073564e-05, "loss": 1.1388, "step": 15360 }, { "epoch": 1.29, "grad_norm": 1.0563576221466064, "learning_rate": 8.726906281491434e-05, "loss": 1.1027, "step": 15370 }, { "epoch": 1.29, "grad_norm": 0.9495493173599243, "learning_rate": 8.726066509909306e-05, "loss": 1.131, "step": 15380 }, { "epoch": 1.29, "grad_norm": 0.8986271619796753, "learning_rate": 8.725226738327176e-05, "loss": 1.114, "step": 15390 }, { "epoch": 1.29, "grad_norm": 0.924021303653717, "learning_rate": 8.724386966745046e-05, "loss": 1.093, "step": 15400 }, { "epoch": 1.29, "grad_norm": 0.9179587364196777, "learning_rate": 8.723547195162917e-05, "loss": 1.1203, "step": 15410 }, { "epoch": 1.29, "grad_norm": 0.975875198841095, "learning_rate": 8.722707423580786e-05, "loss": 1.1177, "step": 15420 }, { "epoch": 1.29, "grad_norm": 0.9055646061897278, "learning_rate": 8.721867651998656e-05, "loss": 1.1213, "step": 15430 }, { "epoch": 1.29, "grad_norm": 0.9086652994155884, "learning_rate": 8.721027880416527e-05, "loss": 1.1207, "step": 15440 }, { "epoch": 1.3, "grad_norm": 0.9314917325973511, "learning_rate": 8.720188108834398e-05, "loss": 1.0941, "step": 15450 }, { "epoch": 1.3, "grad_norm": 0.9791564345359802, "learning_rate": 8.719348337252268e-05, "loss": 1.1331, "step": 15460 }, { "epoch": 1.3, "grad_norm": 0.8971198797225952, "learning_rate": 8.718508565670138e-05, "loss": 1.1099, "step": 15470 }, { "epoch": 1.3, "grad_norm": 0.8518558740615845, "learning_rate": 8.717668794088009e-05, "loss": 1.103, "step": 15480 }, { "epoch": 1.3, "grad_norm": 0.9296644926071167, "learning_rate": 8.716829022505879e-05, "loss": 1.1074, "step": 15490 }, { "epoch": 1.3, "grad_norm": 0.972202479839325, "learning_rate": 8.715989250923749e-05, "loss": 1.1089, "step": 15500 }, { "epoch": 1.3, "grad_norm": 0.9175410866737366, "learning_rate": 8.715149479341619e-05, "loss": 1.1179, "step": 15510 }, { "epoch": 1.3, "grad_norm": 0.8872547149658203, "learning_rate": 8.71430970775949e-05, "loss": 1.1153, "step": 15520 }, { "epoch": 1.3, "grad_norm": 0.9035467505455017, "learning_rate": 8.71346993617736e-05, "loss": 1.1164, "step": 15530 }, { "epoch": 1.3, "grad_norm": 0.9036070108413696, "learning_rate": 8.712630164595231e-05, "loss": 1.103, "step": 15540 }, { "epoch": 1.3, "grad_norm": 0.9524517059326172, "learning_rate": 8.711790393013101e-05, "loss": 1.1093, "step": 15550 }, { "epoch": 1.3, "grad_norm": 0.8619802594184875, "learning_rate": 8.710950621430971e-05, "loss": 1.1252, "step": 15560 }, { "epoch": 1.31, "grad_norm": 0.9018606543540955, "learning_rate": 8.710110849848841e-05, "loss": 1.1197, "step": 15570 }, { "epoch": 1.31, "grad_norm": 0.9069834351539612, "learning_rate": 8.709271078266712e-05, "loss": 1.1097, "step": 15580 }, { "epoch": 1.31, "grad_norm": 0.8745734691619873, "learning_rate": 8.708431306684583e-05, "loss": 1.1051, "step": 15590 }, { "epoch": 1.31, "grad_norm": 1.0042290687561035, "learning_rate": 8.707591535102453e-05, "loss": 1.1062, "step": 15600 }, { "epoch": 1.31, "grad_norm": 0.9338233470916748, "learning_rate": 8.706751763520323e-05, "loss": 1.1223, "step": 15610 }, { "epoch": 1.31, "grad_norm": 0.8764388561248779, "learning_rate": 8.705911991938194e-05, "loss": 1.1312, "step": 15620 }, { "epoch": 1.31, "grad_norm": 0.9034644365310669, "learning_rate": 8.705072220356063e-05, "loss": 1.1199, "step": 15630 }, { "epoch": 1.31, "grad_norm": 0.9459267854690552, "learning_rate": 8.704232448773933e-05, "loss": 1.1005, "step": 15640 }, { "epoch": 1.31, "grad_norm": 0.9199139475822449, "learning_rate": 8.703392677191804e-05, "loss": 1.1113, "step": 15650 }, { "epoch": 1.31, "grad_norm": 0.9240623712539673, "learning_rate": 8.702552905609674e-05, "loss": 1.1226, "step": 15660 }, { "epoch": 1.31, "grad_norm": 0.9151225686073303, "learning_rate": 8.701713134027544e-05, "loss": 1.1148, "step": 15670 }, { "epoch": 1.31, "grad_norm": 1.002893328666687, "learning_rate": 8.700873362445416e-05, "loss": 1.0963, "step": 15680 }, { "epoch": 1.32, "grad_norm": 0.9323188662528992, "learning_rate": 8.700033590863286e-05, "loss": 1.0792, "step": 15690 }, { "epoch": 1.32, "grad_norm": 0.9247946739196777, "learning_rate": 8.699193819281156e-05, "loss": 1.1055, "step": 15700 }, { "epoch": 1.32, "grad_norm": 0.9568535685539246, "learning_rate": 8.698354047699026e-05, "loss": 1.0878, "step": 15710 }, { "epoch": 1.32, "grad_norm": 0.9059244990348816, "learning_rate": 8.697514276116898e-05, "loss": 1.1345, "step": 15720 }, { "epoch": 1.32, "grad_norm": 0.9476732611656189, "learning_rate": 8.696674504534768e-05, "loss": 1.0899, "step": 15730 }, { "epoch": 1.32, "grad_norm": 0.9337188601493835, "learning_rate": 8.695834732952636e-05, "loss": 1.1426, "step": 15740 }, { "epoch": 1.32, "grad_norm": 0.8653301000595093, "learning_rate": 8.694994961370508e-05, "loss": 1.1234, "step": 15750 }, { "epoch": 1.32, "grad_norm": 0.8637192845344543, "learning_rate": 8.694155189788378e-05, "loss": 1.1005, "step": 15760 }, { "epoch": 1.32, "grad_norm": 0.9008951187133789, "learning_rate": 8.693315418206248e-05, "loss": 1.1104, "step": 15770 }, { "epoch": 1.32, "grad_norm": 0.9144231081008911, "learning_rate": 8.692475646624119e-05, "loss": 1.0905, "step": 15780 }, { "epoch": 1.32, "grad_norm": 0.952981173992157, "learning_rate": 8.69163587504199e-05, "loss": 1.1172, "step": 15790 }, { "epoch": 1.32, "grad_norm": 0.938640296459198, "learning_rate": 8.69079610345986e-05, "loss": 1.1205, "step": 15800 }, { "epoch": 1.33, "grad_norm": 0.9345549941062927, "learning_rate": 8.68995633187773e-05, "loss": 1.1082, "step": 15810 }, { "epoch": 1.33, "grad_norm": 0.8561148643493652, "learning_rate": 8.689116560295601e-05, "loss": 1.0963, "step": 15820 }, { "epoch": 1.33, "grad_norm": 0.8445508480072021, "learning_rate": 8.688276788713471e-05, "loss": 1.1132, "step": 15830 }, { "epoch": 1.33, "grad_norm": 0.9664802551269531, "learning_rate": 8.68743701713134e-05, "loss": 1.0823, "step": 15840 }, { "epoch": 1.33, "grad_norm": 0.9483756422996521, "learning_rate": 8.686597245549211e-05, "loss": 1.0965, "step": 15850 }, { "epoch": 1.33, "grad_norm": 1.0393468141555786, "learning_rate": 8.685757473967081e-05, "loss": 1.0732, "step": 15860 }, { "epoch": 1.33, "grad_norm": 0.9285652041435242, "learning_rate": 8.684917702384951e-05, "loss": 1.1245, "step": 15870 }, { "epoch": 1.33, "grad_norm": 0.907402515411377, "learning_rate": 8.684077930802821e-05, "loss": 1.1197, "step": 15880 }, { "epoch": 1.33, "grad_norm": 0.9397310614585876, "learning_rate": 8.683238159220693e-05, "loss": 1.106, "step": 15890 }, { "epoch": 1.33, "grad_norm": 0.9912298321723938, "learning_rate": 8.682398387638563e-05, "loss": 1.1106, "step": 15900 }, { "epoch": 1.33, "grad_norm": 0.8773438334465027, "learning_rate": 8.681558616056433e-05, "loss": 1.1183, "step": 15910 }, { "epoch": 1.33, "grad_norm": 0.8921714425086975, "learning_rate": 8.680718844474304e-05, "loss": 1.1019, "step": 15920 }, { "epoch": 1.34, "grad_norm": 0.9610801339149475, "learning_rate": 8.679879072892174e-05, "loss": 1.1106, "step": 15930 }, { "epoch": 1.34, "grad_norm": 0.8875612020492554, "learning_rate": 8.679039301310044e-05, "loss": 1.1122, "step": 15940 }, { "epoch": 1.34, "grad_norm": 0.925846517086029, "learning_rate": 8.678199529727915e-05, "loss": 1.1097, "step": 15950 }, { "epoch": 1.34, "grad_norm": 0.9786810278892517, "learning_rate": 8.677359758145785e-05, "loss": 1.1074, "step": 15960 }, { "epoch": 1.34, "grad_norm": 0.9146912693977356, "learning_rate": 8.676519986563655e-05, "loss": 1.1139, "step": 15970 }, { "epoch": 1.34, "grad_norm": 0.8765195608139038, "learning_rate": 8.675680214981525e-05, "loss": 1.1031, "step": 15980 }, { "epoch": 1.34, "grad_norm": 0.9373301267623901, "learning_rate": 8.674840443399396e-05, "loss": 1.1142, "step": 15990 }, { "epoch": 1.34, "grad_norm": 0.9163397550582886, "learning_rate": 8.674000671817266e-05, "loss": 1.1289, "step": 16000 }, { "epoch": 1.34, "grad_norm": 0.9457612633705139, "learning_rate": 8.673160900235136e-05, "loss": 1.1219, "step": 16010 }, { "epoch": 1.34, "grad_norm": 0.9161149859428406, "learning_rate": 8.672321128653008e-05, "loss": 1.1418, "step": 16020 }, { "epoch": 1.34, "grad_norm": 0.9269108176231384, "learning_rate": 8.671481357070878e-05, "loss": 1.1175, "step": 16030 }, { "epoch": 1.34, "grad_norm": 0.9797216653823853, "learning_rate": 8.670641585488748e-05, "loss": 1.0838, "step": 16040 }, { "epoch": 1.35, "grad_norm": 0.9082381725311279, "learning_rate": 8.669801813906618e-05, "loss": 1.1207, "step": 16050 }, { "epoch": 1.35, "grad_norm": 0.9488618969917297, "learning_rate": 8.668962042324488e-05, "loss": 1.0971, "step": 16060 }, { "epoch": 1.35, "grad_norm": 0.8994138836860657, "learning_rate": 8.668122270742358e-05, "loss": 1.1055, "step": 16070 }, { "epoch": 1.35, "grad_norm": 0.9246254563331604, "learning_rate": 8.667282499160228e-05, "loss": 1.1319, "step": 16080 }, { "epoch": 1.35, "grad_norm": 0.8879727125167847, "learning_rate": 8.6664427275781e-05, "loss": 1.1192, "step": 16090 }, { "epoch": 1.35, "grad_norm": 0.9572038650512695, "learning_rate": 8.66560295599597e-05, "loss": 1.1088, "step": 16100 }, { "epoch": 1.35, "grad_norm": 0.9084638357162476, "learning_rate": 8.66476318441384e-05, "loss": 1.0985, "step": 16110 }, { "epoch": 1.35, "grad_norm": 0.9260880351066589, "learning_rate": 8.66392341283171e-05, "loss": 1.0889, "step": 16120 }, { "epoch": 1.35, "grad_norm": 1.0169014930725098, "learning_rate": 8.663083641249581e-05, "loss": 1.1212, "step": 16130 }, { "epoch": 1.35, "grad_norm": 0.9938318729400635, "learning_rate": 8.662243869667451e-05, "loss": 1.1113, "step": 16140 }, { "epoch": 1.35, "grad_norm": 1.01992928981781, "learning_rate": 8.661404098085321e-05, "loss": 1.1201, "step": 16150 }, { "epoch": 1.35, "grad_norm": 0.9507731199264526, "learning_rate": 8.660564326503191e-05, "loss": 1.0985, "step": 16160 }, { "epoch": 1.36, "grad_norm": 0.938035249710083, "learning_rate": 8.659724554921061e-05, "loss": 1.0891, "step": 16170 }, { "epoch": 1.36, "grad_norm": 0.9544059038162231, "learning_rate": 8.658884783338932e-05, "loss": 1.1158, "step": 16180 }, { "epoch": 1.36, "grad_norm": 0.9504319429397583, "learning_rate": 8.658045011756803e-05, "loss": 1.1165, "step": 16190 }, { "epoch": 1.36, "grad_norm": 0.9170682430267334, "learning_rate": 8.657205240174673e-05, "loss": 1.1279, "step": 16200 }, { "epoch": 1.36, "grad_norm": 0.878187358379364, "learning_rate": 8.656365468592543e-05, "loss": 1.1164, "step": 16210 }, { "epoch": 1.36, "grad_norm": 1.0140724182128906, "learning_rate": 8.655525697010413e-05, "loss": 1.0925, "step": 16220 }, { "epoch": 1.36, "grad_norm": 0.9395869970321655, "learning_rate": 8.654685925428285e-05, "loss": 1.096, "step": 16230 }, { "epoch": 1.36, "grad_norm": 0.9655680060386658, "learning_rate": 8.653846153846155e-05, "loss": 1.1219, "step": 16240 }, { "epoch": 1.36, "grad_norm": 0.9558068513870239, "learning_rate": 8.653006382264025e-05, "loss": 1.1072, "step": 16250 }, { "epoch": 1.36, "grad_norm": 0.8891133069992065, "learning_rate": 8.652166610681896e-05, "loss": 1.1004, "step": 16260 }, { "epoch": 1.36, "grad_norm": 0.8977444767951965, "learning_rate": 8.651326839099765e-05, "loss": 1.1175, "step": 16270 }, { "epoch": 1.36, "grad_norm": 0.9006922245025635, "learning_rate": 8.650487067517635e-05, "loss": 1.0965, "step": 16280 }, { "epoch": 1.37, "grad_norm": 0.9141904711723328, "learning_rate": 8.649647295935506e-05, "loss": 1.1182, "step": 16290 }, { "epoch": 1.37, "grad_norm": 0.9052436351776123, "learning_rate": 8.648807524353376e-05, "loss": 1.1037, "step": 16300 }, { "epoch": 1.37, "grad_norm": 0.8963584303855896, "learning_rate": 8.647967752771246e-05, "loss": 1.0736, "step": 16310 }, { "epoch": 1.37, "grad_norm": 0.895529568195343, "learning_rate": 8.647127981189117e-05, "loss": 1.1201, "step": 16320 }, { "epoch": 1.37, "grad_norm": 0.8678609728813171, "learning_rate": 8.646288209606988e-05, "loss": 1.1195, "step": 16330 }, { "epoch": 1.37, "grad_norm": 0.9141554832458496, "learning_rate": 8.645448438024858e-05, "loss": 1.0943, "step": 16340 }, { "epoch": 1.37, "grad_norm": 0.9165868759155273, "learning_rate": 8.644608666442728e-05, "loss": 1.1025, "step": 16350 }, { "epoch": 1.37, "grad_norm": 0.9144678711891174, "learning_rate": 8.643768894860598e-05, "loss": 1.0986, "step": 16360 }, { "epoch": 1.37, "grad_norm": 0.8954872488975525, "learning_rate": 8.642929123278468e-05, "loss": 1.1016, "step": 16370 }, { "epoch": 1.37, "grad_norm": 0.8864966034889221, "learning_rate": 8.642089351696338e-05, "loss": 1.1244, "step": 16380 }, { "epoch": 1.37, "grad_norm": 0.9300432801246643, "learning_rate": 8.641249580114208e-05, "loss": 1.0974, "step": 16390 }, { "epoch": 1.37, "grad_norm": 0.9392675757408142, "learning_rate": 8.64040980853208e-05, "loss": 1.0768, "step": 16400 }, { "epoch": 1.38, "grad_norm": 0.9090225696563721, "learning_rate": 8.63957003694995e-05, "loss": 1.1425, "step": 16410 }, { "epoch": 1.38, "grad_norm": 0.8820337057113647, "learning_rate": 8.63873026536782e-05, "loss": 1.117, "step": 16420 }, { "epoch": 1.38, "grad_norm": 0.9691148400306702, "learning_rate": 8.637890493785691e-05, "loss": 1.1105, "step": 16430 }, { "epoch": 1.38, "grad_norm": 0.9114964008331299, "learning_rate": 8.637050722203561e-05, "loss": 1.1094, "step": 16440 }, { "epoch": 1.38, "grad_norm": 0.9060763716697693, "learning_rate": 8.636210950621432e-05, "loss": 1.1227, "step": 16450 }, { "epoch": 1.38, "grad_norm": 0.9717005491256714, "learning_rate": 8.635371179039302e-05, "loss": 1.1099, "step": 16460 }, { "epoch": 1.38, "grad_norm": 0.926828920841217, "learning_rate": 8.634531407457173e-05, "loss": 1.0955, "step": 16470 }, { "epoch": 1.38, "grad_norm": 0.944095253944397, "learning_rate": 8.633691635875042e-05, "loss": 1.0987, "step": 16480 }, { "epoch": 1.38, "grad_norm": 0.9036884903907776, "learning_rate": 8.632851864292912e-05, "loss": 1.0847, "step": 16490 }, { "epoch": 1.38, "grad_norm": 0.936955451965332, "learning_rate": 8.632012092710783e-05, "loss": 1.0932, "step": 16500 }, { "epoch": 1.38, "grad_norm": 0.9809815287590027, "learning_rate": 8.631172321128653e-05, "loss": 1.1117, "step": 16510 }, { "epoch": 1.38, "grad_norm": 0.871856689453125, "learning_rate": 8.630332549546523e-05, "loss": 1.0907, "step": 16520 }, { "epoch": 1.39, "grad_norm": 0.9420427680015564, "learning_rate": 8.629492777964395e-05, "loss": 1.1084, "step": 16530 }, { "epoch": 1.39, "grad_norm": 0.9744893312454224, "learning_rate": 8.628653006382265e-05, "loss": 1.1061, "step": 16540 }, { "epoch": 1.39, "grad_norm": 0.9527720808982849, "learning_rate": 8.627813234800135e-05, "loss": 1.102, "step": 16550 }, { "epoch": 1.39, "grad_norm": 0.9135245084762573, "learning_rate": 8.626973463218005e-05, "loss": 1.1257, "step": 16560 }, { "epoch": 1.39, "grad_norm": 0.9639981985092163, "learning_rate": 8.626133691635876e-05, "loss": 1.0942, "step": 16570 }, { "epoch": 1.39, "grad_norm": 0.9305285215377808, "learning_rate": 8.625293920053745e-05, "loss": 1.0832, "step": 16580 }, { "epoch": 1.39, "grad_norm": 0.9467176198959351, "learning_rate": 8.624454148471615e-05, "loss": 1.1014, "step": 16590 }, { "epoch": 1.39, "grad_norm": 0.9170607328414917, "learning_rate": 8.623614376889487e-05, "loss": 1.1042, "step": 16600 }, { "epoch": 1.39, "grad_norm": 0.8990239500999451, "learning_rate": 8.622774605307357e-05, "loss": 1.0938, "step": 16610 }, { "epoch": 1.39, "grad_norm": 0.919737696647644, "learning_rate": 8.621934833725227e-05, "loss": 1.1256, "step": 16620 }, { "epoch": 1.39, "grad_norm": 0.927342414855957, "learning_rate": 8.621095062143097e-05, "loss": 1.0588, "step": 16630 }, { "epoch": 1.39, "grad_norm": 0.9596610069274902, "learning_rate": 8.620255290560968e-05, "loss": 1.1139, "step": 16640 }, { "epoch": 1.4, "grad_norm": 0.9167120456695557, "learning_rate": 8.619415518978838e-05, "loss": 1.1182, "step": 16650 }, { "epoch": 1.4, "grad_norm": 0.9366847276687622, "learning_rate": 8.618575747396708e-05, "loss": 1.1034, "step": 16660 }, { "epoch": 1.4, "grad_norm": 0.9725133180618286, "learning_rate": 8.61773597581458e-05, "loss": 1.1178, "step": 16670 }, { "epoch": 1.4, "grad_norm": 0.9172531366348267, "learning_rate": 8.61689620423245e-05, "loss": 1.0879, "step": 16680 }, { "epoch": 1.4, "grad_norm": 0.8928595781326294, "learning_rate": 8.616056432650319e-05, "loss": 1.0983, "step": 16690 }, { "epoch": 1.4, "grad_norm": 0.9851216673851013, "learning_rate": 8.61521666106819e-05, "loss": 1.1089, "step": 16700 }, { "epoch": 1.4, "grad_norm": 0.9329010844230652, "learning_rate": 8.61437688948606e-05, "loss": 1.1232, "step": 16710 }, { "epoch": 1.4, "grad_norm": 0.8544999957084656, "learning_rate": 8.61353711790393e-05, "loss": 1.0947, "step": 16720 }, { "epoch": 1.4, "grad_norm": 0.9257146716117859, "learning_rate": 8.6126973463218e-05, "loss": 1.1086, "step": 16730 }, { "epoch": 1.4, "grad_norm": 0.8957710266113281, "learning_rate": 8.611857574739672e-05, "loss": 1.1104, "step": 16740 }, { "epoch": 1.4, "grad_norm": 0.9224327206611633, "learning_rate": 8.611017803157542e-05, "loss": 1.1093, "step": 16750 }, { "epoch": 1.41, "grad_norm": 0.9331311583518982, "learning_rate": 8.610178031575412e-05, "loss": 1.1207, "step": 16760 }, { "epoch": 1.41, "grad_norm": 0.9614216685295105, "learning_rate": 8.609338259993283e-05, "loss": 1.1143, "step": 16770 }, { "epoch": 1.41, "grad_norm": 0.950002133846283, "learning_rate": 8.608498488411153e-05, "loss": 1.0925, "step": 16780 }, { "epoch": 1.41, "grad_norm": 0.9124014377593994, "learning_rate": 8.607658716829023e-05, "loss": 1.0883, "step": 16790 }, { "epoch": 1.41, "grad_norm": 0.9585002660751343, "learning_rate": 8.606818945246893e-05, "loss": 1.086, "step": 16800 }, { "epoch": 1.41, "grad_norm": 0.9788587093353271, "learning_rate": 8.605979173664763e-05, "loss": 1.0986, "step": 16810 }, { "epoch": 1.41, "grad_norm": 0.9377402067184448, "learning_rate": 8.605139402082634e-05, "loss": 1.0789, "step": 16820 }, { "epoch": 1.41, "grad_norm": 0.9702931642532349, "learning_rate": 8.604299630500504e-05, "loss": 1.0968, "step": 16830 }, { "epoch": 1.41, "grad_norm": 0.8989161849021912, "learning_rate": 8.603459858918375e-05, "loss": 1.1002, "step": 16840 }, { "epoch": 1.41, "grad_norm": 1.0440198183059692, "learning_rate": 8.602620087336245e-05, "loss": 1.11, "step": 16850 }, { "epoch": 1.41, "grad_norm": 0.9456074833869934, "learning_rate": 8.601780315754115e-05, "loss": 1.1072, "step": 16860 }, { "epoch": 1.41, "grad_norm": 0.880145251750946, "learning_rate": 8.600940544171985e-05, "loss": 1.1017, "step": 16870 }, { "epoch": 1.42, "grad_norm": 0.9445072412490845, "learning_rate": 8.600100772589857e-05, "loss": 1.1128, "step": 16880 }, { "epoch": 1.42, "grad_norm": 0.869269073009491, "learning_rate": 8.599261001007727e-05, "loss": 1.1128, "step": 16890 }, { "epoch": 1.42, "grad_norm": 1.0037399530410767, "learning_rate": 8.598421229425595e-05, "loss": 1.1128, "step": 16900 }, { "epoch": 1.42, "grad_norm": 0.9507962465286255, "learning_rate": 8.597581457843467e-05, "loss": 1.1139, "step": 16910 }, { "epoch": 1.42, "grad_norm": 0.9215889573097229, "learning_rate": 8.596741686261337e-05, "loss": 1.1184, "step": 16920 }, { "epoch": 1.42, "grad_norm": 1.0083012580871582, "learning_rate": 8.595901914679207e-05, "loss": 1.1049, "step": 16930 }, { "epoch": 1.42, "grad_norm": 0.9698511958122253, "learning_rate": 8.595062143097078e-05, "loss": 1.0972, "step": 16940 }, { "epoch": 1.42, "grad_norm": 0.9426410794258118, "learning_rate": 8.594222371514949e-05, "loss": 1.126, "step": 16950 }, { "epoch": 1.42, "grad_norm": 0.8693003058433533, "learning_rate": 8.593382599932819e-05, "loss": 1.1075, "step": 16960 }, { "epoch": 1.42, "grad_norm": 0.9229259490966797, "learning_rate": 8.592542828350689e-05, "loss": 1.1006, "step": 16970 }, { "epoch": 1.42, "grad_norm": 0.9683804512023926, "learning_rate": 8.59170305676856e-05, "loss": 1.1131, "step": 16980 }, { "epoch": 1.42, "grad_norm": 0.9072035551071167, "learning_rate": 8.59086328518643e-05, "loss": 1.1056, "step": 16990 }, { "epoch": 1.43, "grad_norm": 0.8945325613021851, "learning_rate": 8.5900235136043e-05, "loss": 1.1063, "step": 17000 }, { "epoch": 1.43, "grad_norm": 0.9545913934707642, "learning_rate": 8.58918374202217e-05, "loss": 1.0916, "step": 17010 }, { "epoch": 1.43, "grad_norm": 0.9185695052146912, "learning_rate": 8.58834397044004e-05, "loss": 1.1102, "step": 17020 }, { "epoch": 1.43, "grad_norm": 0.9400401711463928, "learning_rate": 8.58750419885791e-05, "loss": 1.0953, "step": 17030 }, { "epoch": 1.43, "grad_norm": 0.9485997557640076, "learning_rate": 8.586664427275782e-05, "loss": 1.0975, "step": 17040 }, { "epoch": 1.43, "grad_norm": 0.9165788292884827, "learning_rate": 8.585824655693652e-05, "loss": 1.113, "step": 17050 }, { "epoch": 1.43, "grad_norm": 0.9310771822929382, "learning_rate": 8.584984884111522e-05, "loss": 1.1241, "step": 17060 }, { "epoch": 1.43, "grad_norm": 0.8730702996253967, "learning_rate": 8.584145112529392e-05, "loss": 1.1075, "step": 17070 }, { "epoch": 1.43, "grad_norm": 1.0038843154907227, "learning_rate": 8.583305340947263e-05, "loss": 1.0916, "step": 17080 }, { "epoch": 1.43, "grad_norm": 0.9894881844520569, "learning_rate": 8.582465569365134e-05, "loss": 1.1008, "step": 17090 }, { "epoch": 1.43, "grad_norm": 0.9492472410202026, "learning_rate": 8.581625797783004e-05, "loss": 1.1115, "step": 17100 }, { "epoch": 1.43, "grad_norm": 0.9209385514259338, "learning_rate": 8.580786026200874e-05, "loss": 1.0971, "step": 17110 }, { "epoch": 1.44, "grad_norm": 0.9176464676856995, "learning_rate": 8.579946254618744e-05, "loss": 1.0947, "step": 17120 }, { "epoch": 1.44, "grad_norm": 0.956646740436554, "learning_rate": 8.579190460194827e-05, "loss": 1.1075, "step": 17130 }, { "epoch": 1.44, "grad_norm": 0.9240680932998657, "learning_rate": 8.578350688612697e-05, "loss": 1.1004, "step": 17140 }, { "epoch": 1.44, "grad_norm": 0.9465555548667908, "learning_rate": 8.577510917030567e-05, "loss": 1.1027, "step": 17150 }, { "epoch": 1.44, "grad_norm": 0.9345818161964417, "learning_rate": 8.576671145448439e-05, "loss": 1.1061, "step": 17160 }, { "epoch": 1.44, "grad_norm": 0.91121906042099, "learning_rate": 8.575831373866309e-05, "loss": 1.1047, "step": 17170 }, { "epoch": 1.44, "grad_norm": 0.9541527032852173, "learning_rate": 8.574991602284179e-05, "loss": 1.1144, "step": 17180 }, { "epoch": 1.44, "grad_norm": 1.001551866531372, "learning_rate": 8.574151830702049e-05, "loss": 1.099, "step": 17190 }, { "epoch": 1.44, "grad_norm": 0.9648950695991516, "learning_rate": 8.57331205911992e-05, "loss": 1.1263, "step": 17200 }, { "epoch": 1.44, "grad_norm": 0.9161174893379211, "learning_rate": 8.57247228753779e-05, "loss": 1.1075, "step": 17210 }, { "epoch": 1.44, "grad_norm": 0.9444253444671631, "learning_rate": 8.57163251595566e-05, "loss": 1.1183, "step": 17220 }, { "epoch": 1.44, "grad_norm": 0.8742684125900269, "learning_rate": 8.57079274437353e-05, "loss": 1.0996, "step": 17230 }, { "epoch": 1.45, "grad_norm": 0.8935016393661499, "learning_rate": 8.569952972791401e-05, "loss": 1.0964, "step": 17240 }, { "epoch": 1.45, "grad_norm": 0.8797896504402161, "learning_rate": 8.569113201209271e-05, "loss": 1.1144, "step": 17250 }, { "epoch": 1.45, "grad_norm": 0.9014408588409424, "learning_rate": 8.568273429627142e-05, "loss": 1.1027, "step": 17260 }, { "epoch": 1.45, "grad_norm": 0.9957795143127441, "learning_rate": 8.567433658045012e-05, "loss": 1.0931, "step": 17270 }, { "epoch": 1.45, "grad_norm": 0.8871063590049744, "learning_rate": 8.566593886462882e-05, "loss": 1.1194, "step": 17280 }, { "epoch": 1.45, "grad_norm": 0.8539029359817505, "learning_rate": 8.565754114880752e-05, "loss": 1.0869, "step": 17290 }, { "epoch": 1.45, "grad_norm": 0.8829309940338135, "learning_rate": 8.564914343298624e-05, "loss": 1.1106, "step": 17300 }, { "epoch": 1.45, "grad_norm": 0.938969612121582, "learning_rate": 8.564074571716494e-05, "loss": 1.1191, "step": 17310 }, { "epoch": 1.45, "grad_norm": 0.9321015477180481, "learning_rate": 8.563234800134364e-05, "loss": 1.1278, "step": 17320 }, { "epoch": 1.45, "grad_norm": 0.9374982714653015, "learning_rate": 8.562395028552235e-05, "loss": 1.0762, "step": 17330 }, { "epoch": 1.45, "grad_norm": 0.8815956711769104, "learning_rate": 8.561555256970104e-05, "loss": 1.0932, "step": 17340 }, { "epoch": 1.45, "grad_norm": 0.9213575720787048, "learning_rate": 8.560715485387974e-05, "loss": 1.0962, "step": 17350 }, { "epoch": 1.46, "grad_norm": 1.0526560544967651, "learning_rate": 8.559875713805846e-05, "loss": 1.0923, "step": 17360 }, { "epoch": 1.46, "grad_norm": 0.8528130650520325, "learning_rate": 8.559035942223716e-05, "loss": 1.1294, "step": 17370 }, { "epoch": 1.46, "grad_norm": 0.9504371285438538, "learning_rate": 8.558196170641586e-05, "loss": 1.1106, "step": 17380 }, { "epoch": 1.46, "grad_norm": 1.0213943719863892, "learning_rate": 8.557356399059456e-05, "loss": 1.1284, "step": 17390 }, { "epoch": 1.46, "grad_norm": 0.9347829818725586, "learning_rate": 8.556516627477327e-05, "loss": 1.0845, "step": 17400 }, { "epoch": 1.46, "grad_norm": 0.9889435768127441, "learning_rate": 8.555676855895197e-05, "loss": 1.1067, "step": 17410 }, { "epoch": 1.46, "grad_norm": 0.9114763140678406, "learning_rate": 8.554837084313067e-05, "loss": 1.0956, "step": 17420 }, { "epoch": 1.46, "grad_norm": 0.9614638090133667, "learning_rate": 8.553997312730937e-05, "loss": 1.0971, "step": 17430 }, { "epoch": 1.46, "grad_norm": 0.945780873298645, "learning_rate": 8.553157541148807e-05, "loss": 1.1041, "step": 17440 }, { "epoch": 1.46, "grad_norm": 0.9689162373542786, "learning_rate": 8.552317769566678e-05, "loss": 1.1195, "step": 17450 }, { "epoch": 1.46, "grad_norm": 1.2564977407455444, "learning_rate": 8.551477997984548e-05, "loss": 1.0891, "step": 17460 }, { "epoch": 1.46, "grad_norm": 0.9196128845214844, "learning_rate": 8.550638226402419e-05, "loss": 1.1039, "step": 17470 }, { "epoch": 1.47, "grad_norm": 0.8786943554878235, "learning_rate": 8.549798454820289e-05, "loss": 1.0919, "step": 17480 }, { "epoch": 1.47, "grad_norm": 0.9473636746406555, "learning_rate": 8.548958683238159e-05, "loss": 1.095, "step": 17490 }, { "epoch": 1.47, "grad_norm": 0.948150098323822, "learning_rate": 8.54811891165603e-05, "loss": 1.1019, "step": 17500 }, { "epoch": 1.47, "grad_norm": 0.9033188819885254, "learning_rate": 8.547279140073901e-05, "loss": 1.0691, "step": 17510 }, { "epoch": 1.47, "grad_norm": 0.8785542249679565, "learning_rate": 8.546439368491771e-05, "loss": 1.1045, "step": 17520 }, { "epoch": 1.47, "grad_norm": 0.8864615559577942, "learning_rate": 8.545599596909641e-05, "loss": 1.1086, "step": 17530 }, { "epoch": 1.47, "grad_norm": 0.9623948931694031, "learning_rate": 8.544759825327512e-05, "loss": 1.0822, "step": 17540 }, { "epoch": 1.47, "grad_norm": 0.9355566501617432, "learning_rate": 8.543920053745381e-05, "loss": 1.0975, "step": 17550 }, { "epoch": 1.47, "grad_norm": 0.9936404228210449, "learning_rate": 8.543080282163251e-05, "loss": 1.0923, "step": 17560 }, { "epoch": 1.47, "grad_norm": 0.9029406905174255, "learning_rate": 8.542240510581122e-05, "loss": 1.1152, "step": 17570 }, { "epoch": 1.47, "grad_norm": 0.8796647191047668, "learning_rate": 8.541400738998993e-05, "loss": 1.0763, "step": 17580 }, { "epoch": 1.47, "grad_norm": 0.9394406676292419, "learning_rate": 8.540560967416863e-05, "loss": 1.1015, "step": 17590 }, { "epoch": 1.48, "grad_norm": 0.9513610005378723, "learning_rate": 8.539721195834734e-05, "loss": 1.0954, "step": 17600 }, { "epoch": 1.48, "grad_norm": 0.9663611650466919, "learning_rate": 8.538881424252604e-05, "loss": 1.1162, "step": 17610 }, { "epoch": 1.48, "grad_norm": 0.923028826713562, "learning_rate": 8.538041652670474e-05, "loss": 1.1077, "step": 17620 }, { "epoch": 1.48, "grad_norm": 0.9535107016563416, "learning_rate": 8.537201881088344e-05, "loss": 1.1055, "step": 17630 }, { "epoch": 1.48, "grad_norm": 0.9206113219261169, "learning_rate": 8.536362109506216e-05, "loss": 1.0738, "step": 17640 }, { "epoch": 1.48, "grad_norm": 0.9430006742477417, "learning_rate": 8.535522337924084e-05, "loss": 1.1086, "step": 17650 }, { "epoch": 1.48, "grad_norm": 0.946961522102356, "learning_rate": 8.534682566341954e-05, "loss": 1.1203, "step": 17660 }, { "epoch": 1.48, "grad_norm": 0.9791433811187744, "learning_rate": 8.533842794759826e-05, "loss": 1.09, "step": 17670 }, { "epoch": 1.48, "grad_norm": 0.9791099429130554, "learning_rate": 8.533003023177696e-05, "loss": 1.0989, "step": 17680 }, { "epoch": 1.48, "grad_norm": 0.9486845135688782, "learning_rate": 8.532163251595566e-05, "loss": 1.0983, "step": 17690 }, { "epoch": 1.48, "grad_norm": 0.8685942888259888, "learning_rate": 8.531323480013436e-05, "loss": 1.1229, "step": 17700 }, { "epoch": 1.48, "grad_norm": 0.9693712592124939, "learning_rate": 8.530483708431307e-05, "loss": 1.1066, "step": 17710 }, { "epoch": 1.49, "grad_norm": 0.9077468514442444, "learning_rate": 8.529643936849178e-05, "loss": 1.1226, "step": 17720 }, { "epoch": 1.49, "grad_norm": 0.8657535910606384, "learning_rate": 8.528804165267048e-05, "loss": 1.0945, "step": 17730 }, { "epoch": 1.49, "grad_norm": 0.8869027495384216, "learning_rate": 8.527964393684919e-05, "loss": 1.0969, "step": 17740 }, { "epoch": 1.49, "grad_norm": 0.9077990651130676, "learning_rate": 8.527124622102789e-05, "loss": 1.1164, "step": 17750 }, { "epoch": 1.49, "grad_norm": 0.8592579364776611, "learning_rate": 8.526284850520658e-05, "loss": 1.1137, "step": 17760 }, { "epoch": 1.49, "grad_norm": 0.8895130753517151, "learning_rate": 8.525445078938529e-05, "loss": 1.0995, "step": 17770 }, { "epoch": 1.49, "grad_norm": 0.8667956590652466, "learning_rate": 8.5246053073564e-05, "loss": 1.1068, "step": 17780 }, { "epoch": 1.49, "grad_norm": 0.8754933476448059, "learning_rate": 8.52376553577427e-05, "loss": 1.107, "step": 17790 }, { "epoch": 1.49, "grad_norm": 0.9518774747848511, "learning_rate": 8.52292576419214e-05, "loss": 1.1096, "step": 17800 }, { "epoch": 1.49, "grad_norm": 0.9864497780799866, "learning_rate": 8.522085992610011e-05, "loss": 1.0849, "step": 17810 }, { "epoch": 1.49, "grad_norm": 0.912307858467102, "learning_rate": 8.521246221027881e-05, "loss": 1.0872, "step": 17820 }, { "epoch": 1.49, "grad_norm": 0.9122206568717957, "learning_rate": 8.520406449445751e-05, "loss": 1.0971, "step": 17830 }, { "epoch": 1.5, "grad_norm": 0.8828033804893494, "learning_rate": 8.519566677863622e-05, "loss": 1.1096, "step": 17840 }, { "epoch": 1.5, "grad_norm": 0.9641870856285095, "learning_rate": 8.518726906281493e-05, "loss": 1.0712, "step": 17850 }, { "epoch": 1.5, "grad_norm": 0.9996892809867859, "learning_rate": 8.517887134699363e-05, "loss": 1.1109, "step": 17860 }, { "epoch": 1.5, "grad_norm": 0.8540690541267395, "learning_rate": 8.517047363117233e-05, "loss": 1.1083, "step": 17870 }, { "epoch": 1.5, "grad_norm": 0.9290221929550171, "learning_rate": 8.516207591535103e-05, "loss": 1.1033, "step": 17880 }, { "epoch": 1.5, "grad_norm": 0.9058725237846375, "learning_rate": 8.515367819952973e-05, "loss": 1.0865, "step": 17890 }, { "epoch": 1.5, "grad_norm": 0.9277447462081909, "learning_rate": 8.514528048370843e-05, "loss": 1.113, "step": 17900 }, { "epoch": 1.5, "grad_norm": 0.920447051525116, "learning_rate": 8.513688276788714e-05, "loss": 1.0831, "step": 17910 }, { "epoch": 1.5, "grad_norm": 0.9684462547302246, "learning_rate": 8.512848505206584e-05, "loss": 1.0917, "step": 17920 }, { "epoch": 1.5, "grad_norm": 0.9636054039001465, "learning_rate": 8.512008733624454e-05, "loss": 1.0996, "step": 17930 }, { "epoch": 1.5, "grad_norm": 0.8802708983421326, "learning_rate": 8.511168962042324e-05, "loss": 1.1095, "step": 17940 }, { "epoch": 1.5, "grad_norm": 0.9438508749008179, "learning_rate": 8.510329190460196e-05, "loss": 1.0972, "step": 17950 }, { "epoch": 1.51, "grad_norm": 0.9617946147918701, "learning_rate": 8.509489418878066e-05, "loss": 1.1107, "step": 17960 }, { "epoch": 1.51, "grad_norm": 0.9735690355300903, "learning_rate": 8.508649647295936e-05, "loss": 1.1005, "step": 17970 }, { "epoch": 1.51, "grad_norm": 0.9920896291732788, "learning_rate": 8.507809875713806e-05, "loss": 1.0953, "step": 17980 }, { "epoch": 1.51, "grad_norm": 0.9597938656806946, "learning_rate": 8.506970104131676e-05, "loss": 1.1043, "step": 17990 }, { "epoch": 1.51, "grad_norm": 0.8762233853340149, "learning_rate": 8.506130332549546e-05, "loss": 1.103, "step": 18000 }, { "epoch": 1.51, "grad_norm": 0.9935160875320435, "learning_rate": 8.505290560967418e-05, "loss": 1.1178, "step": 18010 }, { "epoch": 1.51, "grad_norm": 0.9437522888183594, "learning_rate": 8.504450789385288e-05, "loss": 1.1281, "step": 18020 }, { "epoch": 1.51, "grad_norm": 0.9870259761810303, "learning_rate": 8.503611017803158e-05, "loss": 1.0936, "step": 18030 }, { "epoch": 1.51, "grad_norm": 1.0250309705734253, "learning_rate": 8.502771246221028e-05, "loss": 1.0999, "step": 18040 }, { "epoch": 1.51, "grad_norm": 0.9312120676040649, "learning_rate": 8.501931474638899e-05, "loss": 1.1039, "step": 18050 }, { "epoch": 1.51, "grad_norm": 0.9383872747421265, "learning_rate": 8.50109170305677e-05, "loss": 1.0702, "step": 18060 }, { "epoch": 1.51, "grad_norm": 1.0627061128616333, "learning_rate": 8.50025193147464e-05, "loss": 1.1119, "step": 18070 }, { "epoch": 1.52, "grad_norm": 0.9264964461326599, "learning_rate": 8.49941215989251e-05, "loss": 1.0883, "step": 18080 }, { "epoch": 1.52, "grad_norm": 0.9627074599266052, "learning_rate": 8.49857238831038e-05, "loss": 1.0718, "step": 18090 }, { "epoch": 1.52, "grad_norm": 1.029201626777649, "learning_rate": 8.49773261672825e-05, "loss": 1.0991, "step": 18100 }, { "epoch": 1.52, "grad_norm": 0.8738752007484436, "learning_rate": 8.496892845146121e-05, "loss": 1.0994, "step": 18110 }, { "epoch": 1.52, "grad_norm": 0.9824410080909729, "learning_rate": 8.496053073563991e-05, "loss": 1.1017, "step": 18120 }, { "epoch": 1.52, "grad_norm": 0.8930276036262512, "learning_rate": 8.495213301981861e-05, "loss": 1.095, "step": 18130 }, { "epoch": 1.52, "grad_norm": 0.9773525595664978, "learning_rate": 8.494373530399731e-05, "loss": 1.0967, "step": 18140 }, { "epoch": 1.52, "grad_norm": 0.9366147518157959, "learning_rate": 8.493533758817603e-05, "loss": 1.0693, "step": 18150 }, { "epoch": 1.52, "grad_norm": 0.9455195665359497, "learning_rate": 8.492693987235473e-05, "loss": 1.1014, "step": 18160 }, { "epoch": 1.52, "grad_norm": 0.9755341410636902, "learning_rate": 8.491854215653343e-05, "loss": 1.0884, "step": 18170 }, { "epoch": 1.52, "grad_norm": 0.9536694884300232, "learning_rate": 8.491014444071213e-05, "loss": 1.0861, "step": 18180 }, { "epoch": 1.52, "grad_norm": 0.8827406764030457, "learning_rate": 8.490174672489083e-05, "loss": 1.0809, "step": 18190 }, { "epoch": 1.53, "grad_norm": 0.9611886143684387, "learning_rate": 8.489334900906953e-05, "loss": 1.0961, "step": 18200 }, { "epoch": 1.53, "grad_norm": 0.9199916124343872, "learning_rate": 8.488495129324824e-05, "loss": 1.1018, "step": 18210 }, { "epoch": 1.53, "grad_norm": 0.9742301106452942, "learning_rate": 8.487655357742695e-05, "loss": 1.1064, "step": 18220 }, { "epoch": 1.53, "grad_norm": 0.9249382615089417, "learning_rate": 8.486815586160565e-05, "loss": 1.1103, "step": 18230 }, { "epoch": 1.53, "grad_norm": 0.9248377680778503, "learning_rate": 8.485975814578435e-05, "loss": 1.0903, "step": 18240 }, { "epoch": 1.53, "grad_norm": 0.983910083770752, "learning_rate": 8.485136042996306e-05, "loss": 1.0985, "step": 18250 }, { "epoch": 1.53, "grad_norm": 0.9560139179229736, "learning_rate": 8.484296271414176e-05, "loss": 1.0994, "step": 18260 }, { "epoch": 1.53, "grad_norm": 0.892369270324707, "learning_rate": 8.483456499832046e-05, "loss": 1.0989, "step": 18270 }, { "epoch": 1.53, "grad_norm": 0.9658694267272949, "learning_rate": 8.482616728249916e-05, "loss": 1.0846, "step": 18280 }, { "epoch": 1.53, "grad_norm": 0.9675649404525757, "learning_rate": 8.481776956667786e-05, "loss": 1.0803, "step": 18290 }, { "epoch": 1.53, "grad_norm": 0.9331943392753601, "learning_rate": 8.480937185085656e-05, "loss": 1.1074, "step": 18300 }, { "epoch": 1.53, "grad_norm": 0.9198856949806213, "learning_rate": 8.480097413503527e-05, "loss": 1.0911, "step": 18310 }, { "epoch": 1.54, "grad_norm": 0.9248422980308533, "learning_rate": 8.479257641921398e-05, "loss": 1.1112, "step": 18320 }, { "epoch": 1.54, "grad_norm": 0.9331924319267273, "learning_rate": 8.478417870339268e-05, "loss": 1.093, "step": 18330 }, { "epoch": 1.54, "grad_norm": 0.924685001373291, "learning_rate": 8.477578098757138e-05, "loss": 1.0987, "step": 18340 }, { "epoch": 1.54, "grad_norm": 0.9171777963638306, "learning_rate": 8.47673832717501e-05, "loss": 1.0892, "step": 18350 }, { "epoch": 1.54, "grad_norm": 0.9498481750488281, "learning_rate": 8.47589855559288e-05, "loss": 1.0988, "step": 18360 }, { "epoch": 1.54, "grad_norm": 0.9432202577590942, "learning_rate": 8.47505878401075e-05, "loss": 1.099, "step": 18370 }, { "epoch": 1.54, "grad_norm": 0.9923070073127747, "learning_rate": 8.47421901242862e-05, "loss": 1.1257, "step": 18380 }, { "epoch": 1.54, "grad_norm": 0.9701034426689148, "learning_rate": 8.473379240846491e-05, "loss": 1.094, "step": 18390 }, { "epoch": 1.54, "grad_norm": 0.9445346593856812, "learning_rate": 8.47253946926436e-05, "loss": 1.1179, "step": 18400 }, { "epoch": 1.54, "grad_norm": 0.8754343390464783, "learning_rate": 8.47169969768223e-05, "loss": 1.1053, "step": 18410 }, { "epoch": 1.54, "grad_norm": 0.987666666507721, "learning_rate": 8.470859926100101e-05, "loss": 1.12, "step": 18420 }, { "epoch": 1.55, "grad_norm": 0.9782336354255676, "learning_rate": 8.470020154517971e-05, "loss": 1.0881, "step": 18430 }, { "epoch": 1.55, "grad_norm": 0.9279545545578003, "learning_rate": 8.469180382935841e-05, "loss": 1.0923, "step": 18440 }, { "epoch": 1.55, "grad_norm": 0.8741807341575623, "learning_rate": 8.468340611353713e-05, "loss": 1.1156, "step": 18450 }, { "epoch": 1.55, "grad_norm": 0.9295707941055298, "learning_rate": 8.467500839771583e-05, "loss": 1.1109, "step": 18460 }, { "epoch": 1.55, "grad_norm": 0.9178563356399536, "learning_rate": 8.466661068189453e-05, "loss": 1.0844, "step": 18470 }, { "epoch": 1.55, "grad_norm": 0.9312026500701904, "learning_rate": 8.465821296607323e-05, "loss": 1.0835, "step": 18480 }, { "epoch": 1.55, "grad_norm": 0.9479117393493652, "learning_rate": 8.464981525025195e-05, "loss": 1.1042, "step": 18490 }, { "epoch": 1.55, "grad_norm": 0.9105421304702759, "learning_rate": 8.464141753443063e-05, "loss": 1.0962, "step": 18500 }, { "epoch": 1.55, "grad_norm": 0.9587165117263794, "learning_rate": 8.463301981860933e-05, "loss": 1.111, "step": 18510 }, { "epoch": 1.55, "grad_norm": 0.9755902886390686, "learning_rate": 8.462462210278805e-05, "loss": 1.1001, "step": 18520 }, { "epoch": 1.55, "grad_norm": 0.9663732647895813, "learning_rate": 8.461622438696675e-05, "loss": 1.0885, "step": 18530 }, { "epoch": 1.55, "grad_norm": 1.0019534826278687, "learning_rate": 8.460782667114545e-05, "loss": 1.1046, "step": 18540 }, { "epoch": 1.56, "grad_norm": 0.9537042379379272, "learning_rate": 8.459942895532415e-05, "loss": 1.0861, "step": 18550 }, { "epoch": 1.56, "grad_norm": 0.9799646735191345, "learning_rate": 8.459103123950286e-05, "loss": 1.0812, "step": 18560 }, { "epoch": 1.56, "grad_norm": 0.8722805976867676, "learning_rate": 8.458263352368156e-05, "loss": 1.0961, "step": 18570 }, { "epoch": 1.56, "grad_norm": 0.9394450187683105, "learning_rate": 8.457423580786027e-05, "loss": 1.1177, "step": 18580 }, { "epoch": 1.56, "grad_norm": 0.8935893774032593, "learning_rate": 8.45666778636211e-05, "loss": 1.0953, "step": 18590 }, { "epoch": 1.56, "grad_norm": 0.930616021156311, "learning_rate": 8.45582801477998e-05, "loss": 1.1041, "step": 18600 }, { "epoch": 1.56, "grad_norm": 0.894257664680481, "learning_rate": 8.454988243197851e-05, "loss": 1.089, "step": 18610 }, { "epoch": 1.56, "grad_norm": 0.9630336165428162, "learning_rate": 8.45414847161572e-05, "loss": 1.0842, "step": 18620 }, { "epoch": 1.56, "grad_norm": 0.9433919787406921, "learning_rate": 8.45330870003359e-05, "loss": 1.0839, "step": 18630 }, { "epoch": 1.56, "grad_norm": 0.9394075274467468, "learning_rate": 8.452468928451462e-05, "loss": 1.0964, "step": 18640 }, { "epoch": 1.56, "grad_norm": 0.9664998650550842, "learning_rate": 8.451629156869332e-05, "loss": 1.1152, "step": 18650 }, { "epoch": 1.56, "grad_norm": 0.9555561542510986, "learning_rate": 8.450789385287202e-05, "loss": 1.093, "step": 18660 }, { "epoch": 1.57, "grad_norm": 0.9212177395820618, "learning_rate": 8.449949613705073e-05, "loss": 1.1, "step": 18670 }, { "epoch": 1.57, "grad_norm": 0.9876947402954102, "learning_rate": 8.449109842122943e-05, "loss": 1.1108, "step": 18680 }, { "epoch": 1.57, "grad_norm": 0.9156479239463806, "learning_rate": 8.448270070540813e-05, "loss": 1.0736, "step": 18690 }, { "epoch": 1.57, "grad_norm": 0.9072255492210388, "learning_rate": 8.447430298958683e-05, "loss": 1.1047, "step": 18700 }, { "epoch": 1.57, "grad_norm": 0.9976296424865723, "learning_rate": 8.446590527376555e-05, "loss": 1.0892, "step": 18710 }, { "epoch": 1.57, "grad_norm": 0.9257268905639648, "learning_rate": 8.445750755794425e-05, "loss": 1.1081, "step": 18720 }, { "epoch": 1.57, "grad_norm": 0.9828330874443054, "learning_rate": 8.444910984212294e-05, "loss": 1.0786, "step": 18730 }, { "epoch": 1.57, "grad_norm": 0.9410355091094971, "learning_rate": 8.444071212630165e-05, "loss": 1.0948, "step": 18740 }, { "epoch": 1.57, "grad_norm": 0.9980003833770752, "learning_rate": 8.443231441048035e-05, "loss": 1.0866, "step": 18750 }, { "epoch": 1.57, "grad_norm": 0.9311619400978088, "learning_rate": 8.442391669465905e-05, "loss": 1.0606, "step": 18760 }, { "epoch": 1.57, "grad_norm": 0.9579390287399292, "learning_rate": 8.441551897883777e-05, "loss": 1.1125, "step": 18770 }, { "epoch": 1.57, "grad_norm": 0.892838180065155, "learning_rate": 8.440712126301647e-05, "loss": 1.0958, "step": 18780 }, { "epoch": 1.58, "grad_norm": 0.964271605014801, "learning_rate": 8.439872354719517e-05, "loss": 1.0817, "step": 18790 }, { "epoch": 1.58, "grad_norm": 0.9034227132797241, "learning_rate": 8.439032583137387e-05, "loss": 1.0884, "step": 18800 }, { "epoch": 1.58, "grad_norm": 0.9671628475189209, "learning_rate": 8.438192811555258e-05, "loss": 1.1003, "step": 18810 }, { "epoch": 1.58, "grad_norm": 0.9182506799697876, "learning_rate": 8.437353039973128e-05, "loss": 1.1334, "step": 18820 }, { "epoch": 1.58, "grad_norm": 0.9010951519012451, "learning_rate": 8.436513268390997e-05, "loss": 1.087, "step": 18830 }, { "epoch": 1.58, "grad_norm": 1.1023180484771729, "learning_rate": 8.435673496808868e-05, "loss": 1.0912, "step": 18840 }, { "epoch": 1.58, "grad_norm": 0.9568824768066406, "learning_rate": 8.434833725226739e-05, "loss": 1.071, "step": 18850 }, { "epoch": 1.58, "grad_norm": 0.9224495887756348, "learning_rate": 8.433993953644609e-05, "loss": 1.0681, "step": 18860 }, { "epoch": 1.58, "grad_norm": 0.9378868341445923, "learning_rate": 8.433154182062479e-05, "loss": 1.0908, "step": 18870 }, { "epoch": 1.58, "grad_norm": 0.9482252597808838, "learning_rate": 8.43231441048035e-05, "loss": 1.0853, "step": 18880 }, { "epoch": 1.58, "grad_norm": 0.9346802830696106, "learning_rate": 8.43147463889822e-05, "loss": 1.1016, "step": 18890 }, { "epoch": 1.58, "grad_norm": 0.9604766368865967, "learning_rate": 8.43063486731609e-05, "loss": 1.0791, "step": 18900 }, { "epoch": 1.59, "grad_norm": 0.9479242563247681, "learning_rate": 8.429795095733962e-05, "loss": 1.0748, "step": 18910 }, { "epoch": 1.59, "grad_norm": 0.9255791902542114, "learning_rate": 8.428955324151832e-05, "loss": 1.1082, "step": 18920 }, { "epoch": 1.59, "grad_norm": 0.9649567604064941, "learning_rate": 8.428115552569702e-05, "loss": 1.0851, "step": 18930 }, { "epoch": 1.59, "grad_norm": 0.9030554294586182, "learning_rate": 8.427275780987572e-05, "loss": 1.0974, "step": 18940 }, { "epoch": 1.59, "grad_norm": 0.9574862718582153, "learning_rate": 8.426436009405442e-05, "loss": 1.0973, "step": 18950 }, { "epoch": 1.59, "grad_norm": 0.9443824887275696, "learning_rate": 8.425596237823312e-05, "loss": 1.0973, "step": 18960 }, { "epoch": 1.59, "grad_norm": 0.9072438478469849, "learning_rate": 8.424756466241182e-05, "loss": 1.094, "step": 18970 }, { "epoch": 1.59, "grad_norm": 1.0134698152542114, "learning_rate": 8.423916694659054e-05, "loss": 1.0964, "step": 18980 }, { "epoch": 1.59, "grad_norm": 0.9087139964103699, "learning_rate": 8.423076923076924e-05, "loss": 1.1001, "step": 18990 }, { "epoch": 1.59, "grad_norm": 0.9348397850990295, "learning_rate": 8.422237151494794e-05, "loss": 1.0988, "step": 19000 }, { "epoch": 1.59, "grad_norm": 0.9368646740913391, "learning_rate": 8.421397379912665e-05, "loss": 1.0877, "step": 19010 }, { "epoch": 1.59, "grad_norm": 0.9744086265563965, "learning_rate": 8.420557608330535e-05, "loss": 1.114, "step": 19020 }, { "epoch": 1.6, "grad_norm": 0.8904855847358704, "learning_rate": 8.419717836748405e-05, "loss": 1.1, "step": 19030 }, { "epoch": 1.6, "grad_norm": 0.9219388365745544, "learning_rate": 8.418878065166275e-05, "loss": 1.0974, "step": 19040 }, { "epoch": 1.6, "grad_norm": 0.9860958456993103, "learning_rate": 8.418038293584145e-05, "loss": 1.0859, "step": 19050 }, { "epoch": 1.6, "grad_norm": 0.9642945528030396, "learning_rate": 8.417198522002015e-05, "loss": 1.0886, "step": 19060 }, { "epoch": 1.6, "grad_norm": 0.9327720403671265, "learning_rate": 8.416358750419885e-05, "loss": 1.117, "step": 19070 }, { "epoch": 1.6, "grad_norm": 0.8988340497016907, "learning_rate": 8.415518978837757e-05, "loss": 1.1063, "step": 19080 }, { "epoch": 1.6, "grad_norm": 0.9338961243629456, "learning_rate": 8.414679207255627e-05, "loss": 1.0951, "step": 19090 }, { "epoch": 1.6, "grad_norm": 0.8888064026832581, "learning_rate": 8.413839435673497e-05, "loss": 1.0878, "step": 19100 }, { "epoch": 1.6, "grad_norm": 0.9350845813751221, "learning_rate": 8.412999664091367e-05, "loss": 1.0779, "step": 19110 }, { "epoch": 1.6, "grad_norm": 0.9388766884803772, "learning_rate": 8.412159892509239e-05, "loss": 1.1079, "step": 19120 }, { "epoch": 1.6, "grad_norm": 0.9182081818580627, "learning_rate": 8.411320120927109e-05, "loss": 1.105, "step": 19130 }, { "epoch": 1.6, "grad_norm": 0.932131826877594, "learning_rate": 8.410480349344979e-05, "loss": 1.093, "step": 19140 }, { "epoch": 1.61, "grad_norm": 0.9025347232818604, "learning_rate": 8.409640577762849e-05, "loss": 1.0969, "step": 19150 }, { "epoch": 1.61, "grad_norm": 1.036621332168579, "learning_rate": 8.408800806180719e-05, "loss": 1.0982, "step": 19160 }, { "epoch": 1.61, "grad_norm": 0.9636215567588806, "learning_rate": 8.407961034598589e-05, "loss": 1.0775, "step": 19170 }, { "epoch": 1.61, "grad_norm": 0.9591709971427917, "learning_rate": 8.40712126301646e-05, "loss": 1.099, "step": 19180 }, { "epoch": 1.61, "grad_norm": 0.9226289391517639, "learning_rate": 8.40628149143433e-05, "loss": 1.0922, "step": 19190 }, { "epoch": 1.61, "grad_norm": 0.8729637265205383, "learning_rate": 8.4054417198522e-05, "loss": 1.0932, "step": 19200 }, { "epoch": 1.61, "grad_norm": 0.9399529695510864, "learning_rate": 8.40460194827007e-05, "loss": 1.0707, "step": 19210 }, { "epoch": 1.61, "grad_norm": 0.930468738079071, "learning_rate": 8.403762176687942e-05, "loss": 1.0735, "step": 19220 }, { "epoch": 1.61, "grad_norm": 1.01289701461792, "learning_rate": 8.402922405105812e-05, "loss": 1.0849, "step": 19230 }, { "epoch": 1.61, "grad_norm": 0.9548333883285522, "learning_rate": 8.402082633523682e-05, "loss": 1.1105, "step": 19240 }, { "epoch": 1.61, "grad_norm": 0.8973714113235474, "learning_rate": 8.401242861941552e-05, "loss": 1.1048, "step": 19250 }, { "epoch": 1.61, "grad_norm": 0.8910929560661316, "learning_rate": 8.400403090359422e-05, "loss": 1.0919, "step": 19260 }, { "epoch": 1.62, "grad_norm": 0.9398332834243774, "learning_rate": 8.399563318777292e-05, "loss": 1.1018, "step": 19270 }, { "epoch": 1.62, "grad_norm": 0.9385080337524414, "learning_rate": 8.398723547195164e-05, "loss": 1.0744, "step": 19280 }, { "epoch": 1.62, "grad_norm": 0.9517788887023926, "learning_rate": 8.397883775613034e-05, "loss": 1.0762, "step": 19290 }, { "epoch": 1.62, "grad_norm": 1.0044435262680054, "learning_rate": 8.397044004030904e-05, "loss": 1.0799, "step": 19300 }, { "epoch": 1.62, "grad_norm": 0.9619836807250977, "learning_rate": 8.396204232448774e-05, "loss": 1.0788, "step": 19310 }, { "epoch": 1.62, "grad_norm": 0.8785536289215088, "learning_rate": 8.395364460866645e-05, "loss": 1.0957, "step": 19320 }, { "epoch": 1.62, "grad_norm": 0.9379634261131287, "learning_rate": 8.394524689284515e-05, "loss": 1.1004, "step": 19330 }, { "epoch": 1.62, "grad_norm": 0.9481569528579712, "learning_rate": 8.393684917702385e-05, "loss": 1.1068, "step": 19340 }, { "epoch": 1.62, "grad_norm": 0.8910374045372009, "learning_rate": 8.392845146120256e-05, "loss": 1.0787, "step": 19350 }, { "epoch": 1.62, "grad_norm": 0.9397358894348145, "learning_rate": 8.392005374538126e-05, "loss": 1.1065, "step": 19360 }, { "epoch": 1.62, "grad_norm": 0.9177324771881104, "learning_rate": 8.391165602955996e-05, "loss": 1.0805, "step": 19370 }, { "epoch": 1.62, "grad_norm": 0.9107533693313599, "learning_rate": 8.390325831373866e-05, "loss": 1.0888, "step": 19380 }, { "epoch": 1.63, "grad_norm": 0.9538021087646484, "learning_rate": 8.389486059791737e-05, "loss": 1.1125, "step": 19390 }, { "epoch": 1.63, "grad_norm": 0.9694135785102844, "learning_rate": 8.388646288209607e-05, "loss": 1.0996, "step": 19400 }, { "epoch": 1.63, "grad_norm": 0.9632307291030884, "learning_rate": 8.387806516627477e-05, "loss": 1.0881, "step": 19410 }, { "epoch": 1.63, "grad_norm": 0.9254166483879089, "learning_rate": 8.386966745045349e-05, "loss": 1.0994, "step": 19420 }, { "epoch": 1.63, "grad_norm": 0.9248160719871521, "learning_rate": 8.386126973463219e-05, "loss": 1.0838, "step": 19430 }, { "epoch": 1.63, "grad_norm": 0.9138569831848145, "learning_rate": 8.385287201881089e-05, "loss": 1.1108, "step": 19440 }, { "epoch": 1.63, "grad_norm": 0.8703060746192932, "learning_rate": 8.384447430298959e-05, "loss": 1.0866, "step": 19450 }, { "epoch": 1.63, "grad_norm": 0.9065632224082947, "learning_rate": 8.38360765871683e-05, "loss": 1.132, "step": 19460 }, { "epoch": 1.63, "grad_norm": 1.037733793258667, "learning_rate": 8.382767887134699e-05, "loss": 1.0943, "step": 19470 }, { "epoch": 1.63, "grad_norm": 0.9354545474052429, "learning_rate": 8.381928115552569e-05, "loss": 1.0801, "step": 19480 }, { "epoch": 1.63, "grad_norm": 1.0639814138412476, "learning_rate": 8.38108834397044e-05, "loss": 1.0851, "step": 19490 }, { "epoch": 1.63, "grad_norm": 0.9247221350669861, "learning_rate": 8.38024857238831e-05, "loss": 1.1058, "step": 19500 }, { "epoch": 1.64, "grad_norm": 0.8831647634506226, "learning_rate": 8.379408800806181e-05, "loss": 1.1169, "step": 19510 }, { "epoch": 1.64, "grad_norm": 1.0084609985351562, "learning_rate": 8.378569029224052e-05, "loss": 1.0865, "step": 19520 }, { "epoch": 1.64, "grad_norm": 0.9183603525161743, "learning_rate": 8.377729257641922e-05, "loss": 1.1007, "step": 19530 }, { "epoch": 1.64, "grad_norm": 0.9382530450820923, "learning_rate": 8.376889486059792e-05, "loss": 1.097, "step": 19540 }, { "epoch": 1.64, "grad_norm": 0.9827193021774292, "learning_rate": 8.376049714477662e-05, "loss": 1.0903, "step": 19550 }, { "epoch": 1.64, "grad_norm": 0.9732797741889954, "learning_rate": 8.375209942895534e-05, "loss": 1.1017, "step": 19560 }, { "epoch": 1.64, "grad_norm": 0.9216248393058777, "learning_rate": 8.374370171313402e-05, "loss": 1.0983, "step": 19570 }, { "epoch": 1.64, "grad_norm": 0.9452435374259949, "learning_rate": 8.373530399731273e-05, "loss": 1.0843, "step": 19580 }, { "epoch": 1.64, "grad_norm": 0.893520176410675, "learning_rate": 8.372690628149144e-05, "loss": 1.0868, "step": 19590 }, { "epoch": 1.64, "grad_norm": 0.952495276927948, "learning_rate": 8.371850856567014e-05, "loss": 1.1068, "step": 19600 }, { "epoch": 1.64, "grad_norm": 0.9048551917076111, "learning_rate": 8.371011084984884e-05, "loss": 1.1087, "step": 19610 }, { "epoch": 1.64, "grad_norm": 0.9745410084724426, "learning_rate": 8.370171313402754e-05, "loss": 1.0941, "step": 19620 }, { "epoch": 1.65, "grad_norm": 0.9055392742156982, "learning_rate": 8.369331541820626e-05, "loss": 1.0793, "step": 19630 }, { "epoch": 1.65, "grad_norm": 0.943188488483429, "learning_rate": 8.368491770238496e-05, "loss": 1.0741, "step": 19640 }, { "epoch": 1.65, "grad_norm": 0.9493341445922852, "learning_rate": 8.367651998656366e-05, "loss": 1.0611, "step": 19650 }, { "epoch": 1.65, "grad_norm": 0.9519808292388916, "learning_rate": 8.366812227074237e-05, "loss": 1.0812, "step": 19660 }, { "epoch": 1.65, "grad_norm": 0.895848274230957, "learning_rate": 8.365972455492107e-05, "loss": 1.1028, "step": 19670 }, { "epoch": 1.65, "grad_norm": 0.9439095854759216, "learning_rate": 8.365132683909976e-05, "loss": 1.1063, "step": 19680 }, { "epoch": 1.65, "grad_norm": 0.933009922504425, "learning_rate": 8.364292912327847e-05, "loss": 1.0913, "step": 19690 }, { "epoch": 1.65, "grad_norm": 0.9215378761291504, "learning_rate": 8.363453140745717e-05, "loss": 1.0949, "step": 19700 }, { "epoch": 1.65, "grad_norm": 0.9607491493225098, "learning_rate": 8.362613369163588e-05, "loss": 1.0928, "step": 19710 }, { "epoch": 1.65, "grad_norm": 0.9107180237770081, "learning_rate": 8.361773597581458e-05, "loss": 1.0811, "step": 19720 }, { "epoch": 1.65, "grad_norm": 0.9495106339454651, "learning_rate": 8.360933825999329e-05, "loss": 1.1017, "step": 19730 }, { "epoch": 1.65, "grad_norm": 0.9339221119880676, "learning_rate": 8.360094054417199e-05, "loss": 1.088, "step": 19740 }, { "epoch": 1.66, "grad_norm": 0.9448457360267639, "learning_rate": 8.359254282835069e-05, "loss": 1.0972, "step": 19750 }, { "epoch": 1.66, "grad_norm": 0.9283002018928528, "learning_rate": 8.35841451125294e-05, "loss": 1.1112, "step": 19760 }, { "epoch": 1.66, "grad_norm": 0.9877318143844604, "learning_rate": 8.35757473967081e-05, "loss": 1.0791, "step": 19770 }, { "epoch": 1.66, "grad_norm": 0.9149442911148071, "learning_rate": 8.35673496808868e-05, "loss": 1.0863, "step": 19780 }, { "epoch": 1.66, "grad_norm": 0.9150471091270447, "learning_rate": 8.355895196506551e-05, "loss": 1.0826, "step": 19790 }, { "epoch": 1.66, "grad_norm": 0.9501169323921204, "learning_rate": 8.355055424924421e-05, "loss": 1.0954, "step": 19800 }, { "epoch": 1.66, "grad_norm": 0.9282243847846985, "learning_rate": 8.354215653342291e-05, "loss": 1.0757, "step": 19810 }, { "epoch": 1.66, "grad_norm": 0.954955518245697, "learning_rate": 8.353375881760161e-05, "loss": 1.0805, "step": 19820 }, { "epoch": 1.66, "grad_norm": 0.9169294834136963, "learning_rate": 8.352536110178032e-05, "loss": 1.1015, "step": 19830 }, { "epoch": 1.66, "grad_norm": 0.9292359948158264, "learning_rate": 8.351696338595902e-05, "loss": 1.081, "step": 19840 }, { "epoch": 1.66, "grad_norm": 0.9779778718948364, "learning_rate": 8.350856567013773e-05, "loss": 1.0671, "step": 19850 }, { "epoch": 1.66, "grad_norm": 0.9182485342025757, "learning_rate": 8.350016795431643e-05, "loss": 1.1031, "step": 19860 }, { "epoch": 1.67, "grad_norm": 0.9590606689453125, "learning_rate": 8.349177023849514e-05, "loss": 1.1048, "step": 19870 }, { "epoch": 1.67, "grad_norm": 0.9228118062019348, "learning_rate": 8.348337252267384e-05, "loss": 1.0764, "step": 19880 }, { "epoch": 1.67, "grad_norm": 0.9537436366081238, "learning_rate": 8.347497480685253e-05, "loss": 1.1226, "step": 19890 }, { "epoch": 1.67, "grad_norm": 0.9905833601951599, "learning_rate": 8.346657709103124e-05, "loss": 1.0988, "step": 19900 }, { "epoch": 1.67, "grad_norm": 0.859458327293396, "learning_rate": 8.345817937520994e-05, "loss": 1.0828, "step": 19910 }, { "epoch": 1.67, "grad_norm": 0.9201727509498596, "learning_rate": 8.344978165938864e-05, "loss": 1.0924, "step": 19920 }, { "epoch": 1.67, "grad_norm": 0.9572312831878662, "learning_rate": 8.344138394356736e-05, "loss": 1.0809, "step": 19930 }, { "epoch": 1.67, "grad_norm": 0.9225120544433594, "learning_rate": 8.343298622774606e-05, "loss": 1.0869, "step": 19940 }, { "epoch": 1.67, "grad_norm": 0.9653365612030029, "learning_rate": 8.342458851192476e-05, "loss": 1.0666, "step": 19950 }, { "epoch": 1.67, "grad_norm": 0.939113438129425, "learning_rate": 8.341619079610346e-05, "loss": 1.071, "step": 19960 }, { "epoch": 1.67, "grad_norm": 0.9120537638664246, "learning_rate": 8.340779308028217e-05, "loss": 1.125, "step": 19970 }, { "epoch": 1.67, "grad_norm": 0.9475664496421814, "learning_rate": 8.339939536446088e-05, "loss": 1.0907, "step": 19980 }, { "epoch": 1.68, "grad_norm": 0.9591881632804871, "learning_rate": 8.339099764863958e-05, "loss": 1.0754, "step": 19990 }, { "epoch": 1.68, "grad_norm": 1.0526905059814453, "learning_rate": 8.338259993281828e-05, "loss": 1.0928, "step": 20000 }, { "epoch": 1.68, "grad_norm": 0.9447547197341919, "learning_rate": 8.337420221699698e-05, "loss": 1.1016, "step": 20010 }, { "epoch": 1.68, "grad_norm": 0.9428248405456543, "learning_rate": 8.336580450117568e-05, "loss": 1.1052, "step": 20020 }, { "epoch": 1.68, "grad_norm": 0.9033694267272949, "learning_rate": 8.335740678535439e-05, "loss": 1.0798, "step": 20030 }, { "epoch": 1.68, "grad_norm": 1.012122631072998, "learning_rate": 8.334900906953309e-05, "loss": 1.0948, "step": 20040 }, { "epoch": 1.68, "grad_norm": 0.9211704134941101, "learning_rate": 8.33406113537118e-05, "loss": 1.0769, "step": 20050 }, { "epoch": 1.68, "grad_norm": 1.0090031623840332, "learning_rate": 8.33322136378905e-05, "loss": 1.0716, "step": 20060 }, { "epoch": 1.68, "grad_norm": 0.9996041655540466, "learning_rate": 8.332381592206921e-05, "loss": 1.1029, "step": 20070 }, { "epoch": 1.68, "grad_norm": 0.950567901134491, "learning_rate": 8.331541820624791e-05, "loss": 1.0849, "step": 20080 }, { "epoch": 1.68, "grad_norm": 0.9663973450660706, "learning_rate": 8.330702049042661e-05, "loss": 1.0459, "step": 20090 }, { "epoch": 1.69, "grad_norm": 1.012169599533081, "learning_rate": 8.329862277460531e-05, "loss": 1.1009, "step": 20100 }, { "epoch": 1.69, "grad_norm": 0.9550958275794983, "learning_rate": 8.329022505878401e-05, "loss": 1.1168, "step": 20110 }, { "epoch": 1.69, "grad_norm": 0.9381619095802307, "learning_rate": 8.328182734296271e-05, "loss": 1.0956, "step": 20120 }, { "epoch": 1.69, "grad_norm": 0.9636103510856628, "learning_rate": 8.327342962714141e-05, "loss": 1.1065, "step": 20130 }, { "epoch": 1.69, "grad_norm": 0.9450669884681702, "learning_rate": 8.326503191132013e-05, "loss": 1.0884, "step": 20140 }, { "epoch": 1.69, "grad_norm": 0.915797233581543, "learning_rate": 8.325663419549883e-05, "loss": 1.0725, "step": 20150 }, { "epoch": 1.69, "grad_norm": 0.9706189036369324, "learning_rate": 8.324823647967753e-05, "loss": 1.0718, "step": 20160 }, { "epoch": 1.69, "grad_norm": 0.9453221559524536, "learning_rate": 8.323983876385624e-05, "loss": 1.0777, "step": 20170 }, { "epoch": 1.69, "grad_norm": 0.932264506816864, "learning_rate": 8.323144104803494e-05, "loss": 1.0708, "step": 20180 }, { "epoch": 1.69, "grad_norm": 0.9162026047706604, "learning_rate": 8.322304333221364e-05, "loss": 1.0987, "step": 20190 }, { "epoch": 1.69, "grad_norm": 0.9243186712265015, "learning_rate": 8.321464561639234e-05, "loss": 1.1059, "step": 20200 }, { "epoch": 1.69, "grad_norm": 0.9535598754882812, "learning_rate": 8.320624790057105e-05, "loss": 1.0899, "step": 20210 }, { "epoch": 1.7, "grad_norm": 0.974871039390564, "learning_rate": 8.319785018474975e-05, "loss": 1.1007, "step": 20220 }, { "epoch": 1.7, "grad_norm": 0.8855296969413757, "learning_rate": 8.318945246892845e-05, "loss": 1.0642, "step": 20230 }, { "epoch": 1.7, "grad_norm": 0.8772308230400085, "learning_rate": 8.318105475310716e-05, "loss": 1.109, "step": 20240 }, { "epoch": 1.7, "grad_norm": 0.9719114303588867, "learning_rate": 8.317265703728586e-05, "loss": 1.0832, "step": 20250 }, { "epoch": 1.7, "grad_norm": 0.9716581106185913, "learning_rate": 8.316425932146456e-05, "loss": 1.0882, "step": 20260 }, { "epoch": 1.7, "grad_norm": 0.9847034811973572, "learning_rate": 8.315586160564328e-05, "loss": 1.0608, "step": 20270 }, { "epoch": 1.7, "grad_norm": 0.9305758476257324, "learning_rate": 8.314746388982198e-05, "loss": 1.0842, "step": 20280 }, { "epoch": 1.7, "grad_norm": 0.9634222388267517, "learning_rate": 8.313906617400068e-05, "loss": 1.0948, "step": 20290 }, { "epoch": 1.7, "grad_norm": 0.9088085889816284, "learning_rate": 8.313066845817938e-05, "loss": 1.0944, "step": 20300 }, { "epoch": 1.7, "grad_norm": 0.9427503943443298, "learning_rate": 8.312227074235808e-05, "loss": 1.0859, "step": 20310 }, { "epoch": 1.7, "grad_norm": 0.9813087582588196, "learning_rate": 8.311387302653678e-05, "loss": 1.0772, "step": 20320 }, { "epoch": 1.7, "grad_norm": 1.0378156900405884, "learning_rate": 8.310547531071548e-05, "loss": 1.085, "step": 20330 }, { "epoch": 1.71, "grad_norm": 0.9516633749008179, "learning_rate": 8.30970775948942e-05, "loss": 1.0633, "step": 20340 }, { "epoch": 1.71, "grad_norm": 0.9724482297897339, "learning_rate": 8.30886798790729e-05, "loss": 1.0834, "step": 20350 }, { "epoch": 1.71, "grad_norm": 0.9871209859848022, "learning_rate": 8.30802821632516e-05, "loss": 1.0966, "step": 20360 }, { "epoch": 1.71, "grad_norm": 0.8819617629051208, "learning_rate": 8.307188444743031e-05, "loss": 1.1045, "step": 20370 }, { "epoch": 1.71, "grad_norm": 0.8651036024093628, "learning_rate": 8.306348673160901e-05, "loss": 1.0794, "step": 20380 }, { "epoch": 1.71, "grad_norm": 0.8954698443412781, "learning_rate": 8.305508901578771e-05, "loss": 1.0852, "step": 20390 }, { "epoch": 1.71, "grad_norm": 0.9244828224182129, "learning_rate": 8.304669129996641e-05, "loss": 1.1205, "step": 20400 }, { "epoch": 1.71, "grad_norm": 1.0268381834030151, "learning_rate": 8.303829358414513e-05, "loss": 1.0525, "step": 20410 }, { "epoch": 1.71, "grad_norm": 0.9340755939483643, "learning_rate": 8.302989586832381e-05, "loss": 1.0769, "step": 20420 }, { "epoch": 1.71, "grad_norm": 0.969909131526947, "learning_rate": 8.302149815250251e-05, "loss": 1.0801, "step": 20430 }, { "epoch": 1.71, "grad_norm": 0.9364521503448486, "learning_rate": 8.301310043668123e-05, "loss": 1.0712, "step": 20440 }, { "epoch": 1.71, "grad_norm": 0.9704520106315613, "learning_rate": 8.300470272085993e-05, "loss": 1.0939, "step": 20450 }, { "epoch": 1.72, "grad_norm": 0.9697971940040588, "learning_rate": 8.299630500503863e-05, "loss": 1.0604, "step": 20460 }, { "epoch": 1.72, "grad_norm": 0.9544804692268372, "learning_rate": 8.298790728921733e-05, "loss": 1.1033, "step": 20470 }, { "epoch": 1.72, "grad_norm": 0.962824821472168, "learning_rate": 8.297950957339605e-05, "loss": 1.0785, "step": 20480 }, { "epoch": 1.72, "grad_norm": 1.0860247611999512, "learning_rate": 8.297111185757475e-05, "loss": 1.0572, "step": 20490 }, { "epoch": 1.72, "grad_norm": 0.9392929077148438, "learning_rate": 8.296271414175345e-05, "loss": 1.0893, "step": 20500 }, { "epoch": 1.72, "grad_norm": 0.8851145505905151, "learning_rate": 8.295431642593216e-05, "loss": 1.0796, "step": 20510 }, { "epoch": 1.72, "grad_norm": 0.9846409559249878, "learning_rate": 8.294591871011086e-05, "loss": 1.0796, "step": 20520 }, { "epoch": 1.72, "grad_norm": 0.9698294401168823, "learning_rate": 8.293752099428955e-05, "loss": 1.0969, "step": 20530 }, { "epoch": 1.72, "grad_norm": 0.9646922945976257, "learning_rate": 8.292912327846826e-05, "loss": 1.0767, "step": 20540 }, { "epoch": 1.72, "grad_norm": 0.9086426496505737, "learning_rate": 8.292072556264696e-05, "loss": 1.0514, "step": 20550 }, { "epoch": 1.72, "grad_norm": 0.943329393863678, "learning_rate": 8.291232784682566e-05, "loss": 1.0919, "step": 20560 }, { "epoch": 1.72, "grad_norm": 0.9775128364562988, "learning_rate": 8.290393013100436e-05, "loss": 1.0855, "step": 20570 }, { "epoch": 1.73, "grad_norm": 0.9288136959075928, "learning_rate": 8.289553241518308e-05, "loss": 1.0845, "step": 20580 }, { "epoch": 1.73, "grad_norm": 0.9252897500991821, "learning_rate": 8.288713469936178e-05, "loss": 1.1043, "step": 20590 }, { "epoch": 1.73, "grad_norm": 0.9563115239143372, "learning_rate": 8.287873698354048e-05, "loss": 1.0949, "step": 20600 }, { "epoch": 1.73, "grad_norm": 0.9599754214286804, "learning_rate": 8.28703392677192e-05, "loss": 1.0817, "step": 20610 }, { "epoch": 1.73, "grad_norm": 0.9107076525688171, "learning_rate": 8.28619415518979e-05, "loss": 1.085, "step": 20620 }, { "epoch": 1.73, "grad_norm": 0.9191986322402954, "learning_rate": 8.285354383607658e-05, "loss": 1.0903, "step": 20630 }, { "epoch": 1.73, "grad_norm": 0.9460235834121704, "learning_rate": 8.28451461202553e-05, "loss": 1.0975, "step": 20640 }, { "epoch": 1.73, "grad_norm": 0.9365357756614685, "learning_rate": 8.2836748404434e-05, "loss": 1.0927, "step": 20650 }, { "epoch": 1.73, "grad_norm": 0.9596380591392517, "learning_rate": 8.28283506886127e-05, "loss": 1.0926, "step": 20660 }, { "epoch": 1.73, "grad_norm": 0.9388223886489868, "learning_rate": 8.28199529727914e-05, "loss": 1.0611, "step": 20670 }, { "epoch": 1.73, "grad_norm": 0.9124281406402588, "learning_rate": 8.281155525697011e-05, "loss": 1.0949, "step": 20680 }, { "epoch": 1.73, "grad_norm": 0.9100083708763123, "learning_rate": 8.280315754114881e-05, "loss": 1.0685, "step": 20690 }, { "epoch": 1.74, "grad_norm": 0.9451543092727661, "learning_rate": 8.279475982532751e-05, "loss": 1.0742, "step": 20700 }, { "epoch": 1.74, "grad_norm": 0.9619319438934326, "learning_rate": 8.278636210950622e-05, "loss": 1.0732, "step": 20710 }, { "epoch": 1.74, "grad_norm": 0.9725054502487183, "learning_rate": 8.277796439368493e-05, "loss": 1.074, "step": 20720 }, { "epoch": 1.74, "grad_norm": 1.0174696445465088, "learning_rate": 8.276956667786363e-05, "loss": 1.0804, "step": 20730 }, { "epoch": 1.74, "grad_norm": 0.9788162112236023, "learning_rate": 8.276116896204232e-05, "loss": 1.0705, "step": 20740 }, { "epoch": 1.74, "grad_norm": 0.9715824127197266, "learning_rate": 8.275277124622103e-05, "loss": 1.0692, "step": 20750 }, { "epoch": 1.74, "grad_norm": 0.943411111831665, "learning_rate": 8.274437353039973e-05, "loss": 1.102, "step": 20760 }, { "epoch": 1.74, "grad_norm": 0.929039716720581, "learning_rate": 8.273597581457843e-05, "loss": 1.0862, "step": 20770 }, { "epoch": 1.74, "grad_norm": 0.9214218258857727, "learning_rate": 8.272757809875715e-05, "loss": 1.0552, "step": 20780 }, { "epoch": 1.74, "grad_norm": 0.9461379051208496, "learning_rate": 8.271918038293585e-05, "loss": 1.0901, "step": 20790 }, { "epoch": 1.74, "grad_norm": 0.8952659964561462, "learning_rate": 8.271078266711455e-05, "loss": 1.1071, "step": 20800 }, { "epoch": 1.74, "grad_norm": 0.903317928314209, "learning_rate": 8.270238495129325e-05, "loss": 1.0806, "step": 20810 }, { "epoch": 1.75, "grad_norm": 0.941967248916626, "learning_rate": 8.269398723547196e-05, "loss": 1.0882, "step": 20820 }, { "epoch": 1.75, "grad_norm": 0.9013721346855164, "learning_rate": 8.268558951965066e-05, "loss": 1.0925, "step": 20830 }, { "epoch": 1.75, "grad_norm": 0.9886294007301331, "learning_rate": 8.267719180382935e-05, "loss": 1.1032, "step": 20840 }, { "epoch": 1.75, "grad_norm": 0.9935934543609619, "learning_rate": 8.266879408800807e-05, "loss": 1.112, "step": 20850 }, { "epoch": 1.75, "grad_norm": 0.9625208377838135, "learning_rate": 8.266039637218677e-05, "loss": 1.0961, "step": 20860 }, { "epoch": 1.75, "grad_norm": 0.9433298707008362, "learning_rate": 8.265199865636547e-05, "loss": 1.0905, "step": 20870 }, { "epoch": 1.75, "grad_norm": 0.9611908197402954, "learning_rate": 8.264360094054418e-05, "loss": 1.0865, "step": 20880 }, { "epoch": 1.75, "grad_norm": 0.9369805455207825, "learning_rate": 8.263520322472288e-05, "loss": 1.0922, "step": 20890 }, { "epoch": 1.75, "grad_norm": 0.9557572603225708, "learning_rate": 8.262680550890158e-05, "loss": 1.0972, "step": 20900 }, { "epoch": 1.75, "grad_norm": 0.9097500443458557, "learning_rate": 8.261840779308028e-05, "loss": 1.0754, "step": 20910 }, { "epoch": 1.75, "grad_norm": 0.9624474048614502, "learning_rate": 8.2610010077259e-05, "loss": 1.0619, "step": 20920 }, { "epoch": 1.75, "grad_norm": 0.9251635074615479, "learning_rate": 8.26016123614377e-05, "loss": 1.0965, "step": 20930 }, { "epoch": 1.76, "grad_norm": 0.9400815963745117, "learning_rate": 8.25932146456164e-05, "loss": 1.0839, "step": 20940 }, { "epoch": 1.76, "grad_norm": 0.9335811138153076, "learning_rate": 8.25848169297951e-05, "loss": 1.0913, "step": 20950 }, { "epoch": 1.76, "grad_norm": 1.0157513618469238, "learning_rate": 8.25764192139738e-05, "loss": 1.0885, "step": 20960 }, { "epoch": 1.76, "grad_norm": 0.9043487310409546, "learning_rate": 8.25680214981525e-05, "loss": 1.0927, "step": 20970 }, { "epoch": 1.76, "grad_norm": 0.8861150741577148, "learning_rate": 8.25596237823312e-05, "loss": 1.0791, "step": 20980 }, { "epoch": 1.76, "grad_norm": 0.9011774063110352, "learning_rate": 8.255122606650992e-05, "loss": 1.0486, "step": 20990 }, { "epoch": 1.76, "grad_norm": 0.9446386098861694, "learning_rate": 8.254282835068862e-05, "loss": 1.1008, "step": 21000 }, { "epoch": 1.76, "grad_norm": 0.8907005190849304, "learning_rate": 8.253443063486732e-05, "loss": 1.0767, "step": 21010 }, { "epoch": 1.76, "grad_norm": 1.0680102109909058, "learning_rate": 8.252603291904603e-05, "loss": 1.0766, "step": 21020 }, { "epoch": 1.76, "grad_norm": 0.9224295616149902, "learning_rate": 8.251763520322473e-05, "loss": 1.093, "step": 21030 }, { "epoch": 1.76, "grad_norm": 0.9853148460388184, "learning_rate": 8.250923748740343e-05, "loss": 1.0854, "step": 21040 }, { "epoch": 1.76, "grad_norm": 1.0135034322738647, "learning_rate": 8.250083977158213e-05, "loss": 1.06, "step": 21050 }, { "epoch": 1.77, "grad_norm": 0.8956427574157715, "learning_rate": 8.249244205576083e-05, "loss": 1.0805, "step": 21060 }, { "epoch": 1.77, "grad_norm": 0.9784128069877625, "learning_rate": 8.248404433993953e-05, "loss": 1.0861, "step": 21070 }, { "epoch": 1.77, "grad_norm": 0.9585116505622864, "learning_rate": 8.247564662411824e-05, "loss": 1.0792, "step": 21080 }, { "epoch": 1.77, "grad_norm": 0.895172119140625, "learning_rate": 8.246724890829695e-05, "loss": 1.0902, "step": 21090 }, { "epoch": 1.77, "grad_norm": 0.9525881409645081, "learning_rate": 8.245885119247565e-05, "loss": 1.0565, "step": 21100 }, { "epoch": 1.77, "grad_norm": 0.8964269757270813, "learning_rate": 8.245045347665435e-05, "loss": 1.0909, "step": 21110 }, { "epoch": 1.77, "grad_norm": 0.9538741111755371, "learning_rate": 8.244205576083307e-05, "loss": 1.0751, "step": 21120 }, { "epoch": 1.77, "grad_norm": 1.0493577718734741, "learning_rate": 8.243365804501177e-05, "loss": 1.0944, "step": 21130 }, { "epoch": 1.77, "grad_norm": 0.932163417339325, "learning_rate": 8.242526032919047e-05, "loss": 1.0669, "step": 21140 }, { "epoch": 1.77, "grad_norm": 0.9787120819091797, "learning_rate": 8.241686261336917e-05, "loss": 1.0753, "step": 21150 }, { "epoch": 1.77, "grad_norm": 0.9854269027709961, "learning_rate": 8.240846489754787e-05, "loss": 1.0568, "step": 21160 }, { "epoch": 1.77, "grad_norm": 0.9161325693130493, "learning_rate": 8.240006718172657e-05, "loss": 1.0643, "step": 21170 }, { "epoch": 1.78, "grad_norm": 0.9661205410957336, "learning_rate": 8.239166946590527e-05, "loss": 1.0643, "step": 21180 }, { "epoch": 1.78, "grad_norm": 0.9399836659431458, "learning_rate": 8.238327175008398e-05, "loss": 1.0842, "step": 21190 }, { "epoch": 1.78, "grad_norm": 0.9120281338691711, "learning_rate": 8.237487403426268e-05, "loss": 1.0735, "step": 21200 }, { "epoch": 1.78, "grad_norm": 0.958123505115509, "learning_rate": 8.236647631844139e-05, "loss": 1.0996, "step": 21210 }, { "epoch": 1.78, "grad_norm": 0.9809736609458923, "learning_rate": 8.235807860262009e-05, "loss": 1.092, "step": 21220 }, { "epoch": 1.78, "grad_norm": 0.9468401670455933, "learning_rate": 8.23496808867988e-05, "loss": 1.0811, "step": 21230 }, { "epoch": 1.78, "grad_norm": 0.90665602684021, "learning_rate": 8.23412831709775e-05, "loss": 1.0662, "step": 21240 }, { "epoch": 1.78, "grad_norm": 1.008862018585205, "learning_rate": 8.23328854551562e-05, "loss": 1.0935, "step": 21250 }, { "epoch": 1.78, "grad_norm": 0.9973275661468506, "learning_rate": 8.232448773933492e-05, "loss": 1.0741, "step": 21260 }, { "epoch": 1.78, "grad_norm": 1.007988691329956, "learning_rate": 8.23160900235136e-05, "loss": 1.1038, "step": 21270 }, { "epoch": 1.78, "grad_norm": 0.9017188549041748, "learning_rate": 8.23076923076923e-05, "loss": 1.0901, "step": 21280 }, { "epoch": 1.78, "grad_norm": 0.9623133540153503, "learning_rate": 8.229929459187102e-05, "loss": 1.0838, "step": 21290 }, { "epoch": 1.79, "grad_norm": 0.9186885952949524, "learning_rate": 8.229089687604972e-05, "loss": 1.0923, "step": 21300 }, { "epoch": 1.79, "grad_norm": 0.9776137471199036, "learning_rate": 8.228249916022842e-05, "loss": 1.062, "step": 21310 }, { "epoch": 1.79, "grad_norm": 0.9220883846282959, "learning_rate": 8.227410144440712e-05, "loss": 1.0607, "step": 21320 }, { "epoch": 1.79, "grad_norm": 0.9247794151306152, "learning_rate": 8.226570372858583e-05, "loss": 1.0754, "step": 21330 }, { "epoch": 1.79, "grad_norm": 0.9729852676391602, "learning_rate": 8.225730601276453e-05, "loss": 1.1019, "step": 21340 }, { "epoch": 1.79, "grad_norm": 0.8800434470176697, "learning_rate": 8.224890829694324e-05, "loss": 1.0904, "step": 21350 }, { "epoch": 1.79, "grad_norm": 0.9299606084823608, "learning_rate": 8.224051058112195e-05, "loss": 1.0743, "step": 21360 }, { "epoch": 1.79, "grad_norm": 0.9961258769035339, "learning_rate": 8.223211286530064e-05, "loss": 1.1022, "step": 21370 }, { "epoch": 1.79, "grad_norm": 0.9743673205375671, "learning_rate": 8.222371514947934e-05, "loss": 1.0716, "step": 21380 }, { "epoch": 1.79, "grad_norm": 0.998546302318573, "learning_rate": 8.221531743365805e-05, "loss": 1.0825, "step": 21390 }, { "epoch": 1.79, "grad_norm": 0.9833109974861145, "learning_rate": 8.220691971783675e-05, "loss": 1.1103, "step": 21400 }, { "epoch": 1.79, "grad_norm": 0.9474690556526184, "learning_rate": 8.219852200201545e-05, "loss": 1.0768, "step": 21410 }, { "epoch": 1.8, "grad_norm": 0.9872686266899109, "learning_rate": 8.219012428619415e-05, "loss": 1.0924, "step": 21420 }, { "epoch": 1.8, "grad_norm": 1.017450213432312, "learning_rate": 8.218172657037287e-05, "loss": 1.0793, "step": 21430 }, { "epoch": 1.8, "grad_norm": 0.972040593624115, "learning_rate": 8.217332885455157e-05, "loss": 1.073, "step": 21440 }, { "epoch": 1.8, "grad_norm": 0.9796550869941711, "learning_rate": 8.216493113873027e-05, "loss": 1.1053, "step": 21450 }, { "epoch": 1.8, "grad_norm": 0.8813996315002441, "learning_rate": 8.215653342290897e-05, "loss": 1.077, "step": 21460 }, { "epoch": 1.8, "grad_norm": 0.8760569095611572, "learning_rate": 8.214813570708768e-05, "loss": 1.0903, "step": 21470 }, { "epoch": 1.8, "grad_norm": 1.0394665002822876, "learning_rate": 8.213973799126637e-05, "loss": 1.0911, "step": 21480 }, { "epoch": 1.8, "grad_norm": 0.933469295501709, "learning_rate": 8.213134027544507e-05, "loss": 1.0826, "step": 21490 }, { "epoch": 1.8, "grad_norm": 0.9994786381721497, "learning_rate": 8.212294255962379e-05, "loss": 1.0663, "step": 21500 }, { "epoch": 1.8, "grad_norm": 0.9118964672088623, "learning_rate": 8.211454484380249e-05, "loss": 1.1219, "step": 21510 }, { "epoch": 1.8, "grad_norm": 0.920733630657196, "learning_rate": 8.210614712798119e-05, "loss": 1.0899, "step": 21520 }, { "epoch": 1.8, "grad_norm": 0.9079091548919678, "learning_rate": 8.20977494121599e-05, "loss": 1.0845, "step": 21530 }, { "epoch": 1.81, "grad_norm": 0.9618085622787476, "learning_rate": 8.20893516963386e-05, "loss": 1.085, "step": 21540 }, { "epoch": 1.81, "grad_norm": 0.9206164479255676, "learning_rate": 8.20809539805173e-05, "loss": 1.0712, "step": 21550 }, { "epoch": 1.81, "grad_norm": 0.9424861073493958, "learning_rate": 8.2072556264696e-05, "loss": 1.0852, "step": 21560 }, { "epoch": 1.81, "grad_norm": 0.9359887838363647, "learning_rate": 8.206415854887472e-05, "loss": 1.0682, "step": 21570 }, { "epoch": 1.81, "grad_norm": 0.9242563247680664, "learning_rate": 8.205576083305342e-05, "loss": 1.0746, "step": 21580 }, { "epoch": 1.81, "grad_norm": 1.0069301128387451, "learning_rate": 8.20473631172321e-05, "loss": 1.1143, "step": 21590 }, { "epoch": 1.81, "grad_norm": 0.9253529906272888, "learning_rate": 8.203896540141082e-05, "loss": 1.0594, "step": 21600 }, { "epoch": 1.81, "grad_norm": 0.9807161688804626, "learning_rate": 8.203056768558952e-05, "loss": 1.0656, "step": 21610 }, { "epoch": 1.81, "grad_norm": 1.0018244981765747, "learning_rate": 8.202216996976822e-05, "loss": 1.1132, "step": 21620 }, { "epoch": 1.81, "grad_norm": 0.9353353977203369, "learning_rate": 8.201377225394694e-05, "loss": 1.0787, "step": 21630 }, { "epoch": 1.81, "grad_norm": 0.8812234997749329, "learning_rate": 8.200537453812564e-05, "loss": 1.0941, "step": 21640 }, { "epoch": 1.81, "grad_norm": 0.923886239528656, "learning_rate": 8.199697682230434e-05, "loss": 1.0931, "step": 21650 }, { "epoch": 1.82, "grad_norm": 0.9797891974449158, "learning_rate": 8.198857910648304e-05, "loss": 1.0864, "step": 21660 }, { "epoch": 1.82, "grad_norm": 0.9551218152046204, "learning_rate": 8.198018139066175e-05, "loss": 1.0962, "step": 21670 }, { "epoch": 1.82, "grad_norm": 0.9550528526306152, "learning_rate": 8.197178367484045e-05, "loss": 1.0974, "step": 21680 }, { "epoch": 1.82, "grad_norm": 0.9366137385368347, "learning_rate": 8.196338595901914e-05, "loss": 1.1015, "step": 21690 }, { "epoch": 1.82, "grad_norm": 1.0413545370101929, "learning_rate": 8.195498824319785e-05, "loss": 1.0791, "step": 21700 }, { "epoch": 1.82, "grad_norm": 0.9629783630371094, "learning_rate": 8.194659052737656e-05, "loss": 1.0885, "step": 21710 }, { "epoch": 1.82, "grad_norm": 0.9376969337463379, "learning_rate": 8.193819281155526e-05, "loss": 1.0595, "step": 21720 }, { "epoch": 1.82, "grad_norm": 0.922089159488678, "learning_rate": 8.192979509573396e-05, "loss": 1.0483, "step": 21730 }, { "epoch": 1.82, "grad_norm": 0.9900693893432617, "learning_rate": 8.192139737991267e-05, "loss": 1.0981, "step": 21740 }, { "epoch": 1.82, "grad_norm": 0.9913142919540405, "learning_rate": 8.191299966409137e-05, "loss": 1.0775, "step": 21750 }, { "epoch": 1.82, "grad_norm": 1.0081490278244019, "learning_rate": 8.190460194827007e-05, "loss": 1.0845, "step": 21760 }, { "epoch": 1.83, "grad_norm": 0.8816586136817932, "learning_rate": 8.189620423244879e-05, "loss": 1.0928, "step": 21770 }, { "epoch": 1.83, "grad_norm": 0.9351156949996948, "learning_rate": 8.188780651662749e-05, "loss": 1.0962, "step": 21780 }, { "epoch": 1.83, "grad_norm": 0.9366614818572998, "learning_rate": 8.187940880080619e-05, "loss": 1.0729, "step": 21790 }, { "epoch": 1.83, "grad_norm": 0.930763304233551, "learning_rate": 8.187101108498489e-05, "loss": 1.0951, "step": 21800 }, { "epoch": 1.83, "grad_norm": 0.9486774802207947, "learning_rate": 8.186261336916359e-05, "loss": 1.0833, "step": 21810 }, { "epoch": 1.83, "grad_norm": 0.9392075538635254, "learning_rate": 8.185421565334229e-05, "loss": 1.0957, "step": 21820 }, { "epoch": 1.83, "grad_norm": 0.9175149202346802, "learning_rate": 8.184581793752099e-05, "loss": 1.0716, "step": 21830 }, { "epoch": 1.83, "grad_norm": 0.9535057544708252, "learning_rate": 8.18374202216997e-05, "loss": 1.092, "step": 21840 }, { "epoch": 1.83, "grad_norm": 0.9135550856590271, "learning_rate": 8.18290225058784e-05, "loss": 1.0677, "step": 21850 }, { "epoch": 1.83, "grad_norm": 0.9349638223648071, "learning_rate": 8.18206247900571e-05, "loss": 1.0694, "step": 21860 }, { "epoch": 1.83, "grad_norm": 0.9543958902359009, "learning_rate": 8.181222707423582e-05, "loss": 1.0898, "step": 21870 }, { "epoch": 1.83, "grad_norm": 0.9582996964454651, "learning_rate": 8.180382935841452e-05, "loss": 1.0672, "step": 21880 }, { "epoch": 1.84, "grad_norm": 0.9028475880622864, "learning_rate": 8.179543164259322e-05, "loss": 1.0957, "step": 21890 }, { "epoch": 1.84, "grad_norm": 0.9782268404960632, "learning_rate": 8.178703392677192e-05, "loss": 1.0925, "step": 21900 }, { "epoch": 1.84, "grad_norm": 0.9743828773498535, "learning_rate": 8.177863621095062e-05, "loss": 1.0715, "step": 21910 }, { "epoch": 1.84, "grad_norm": 0.8652592897415161, "learning_rate": 8.177023849512932e-05, "loss": 1.0864, "step": 21920 }, { "epoch": 1.84, "grad_norm": 0.9322866201400757, "learning_rate": 8.176184077930802e-05, "loss": 1.0908, "step": 21930 }, { "epoch": 1.84, "grad_norm": 0.929085910320282, "learning_rate": 8.175344306348674e-05, "loss": 1.0787, "step": 21940 }, { "epoch": 1.84, "grad_norm": 0.9540700912475586, "learning_rate": 8.174504534766544e-05, "loss": 1.0783, "step": 21950 }, { "epoch": 1.84, "grad_norm": 1.0052622556686401, "learning_rate": 8.173664763184414e-05, "loss": 1.0562, "step": 21960 }, { "epoch": 1.84, "grad_norm": 0.9070284366607666, "learning_rate": 8.172824991602284e-05, "loss": 1.0682, "step": 21970 }, { "epoch": 1.84, "grad_norm": 0.9628702402114868, "learning_rate": 8.171985220020156e-05, "loss": 1.0817, "step": 21980 }, { "epoch": 1.84, "grad_norm": 0.9631544947624207, "learning_rate": 8.171145448438026e-05, "loss": 1.0646, "step": 21990 }, { "epoch": 1.84, "grad_norm": 0.9579653739929199, "learning_rate": 8.170305676855896e-05, "loss": 1.0801, "step": 22000 }, { "epoch": 1.85, "grad_norm": 0.9916050434112549, "learning_rate": 8.169465905273766e-05, "loss": 1.0699, "step": 22010 }, { "epoch": 1.85, "grad_norm": 0.8972102403640747, "learning_rate": 8.168626133691636e-05, "loss": 1.0924, "step": 22020 }, { "epoch": 1.85, "grad_norm": 0.9655322432518005, "learning_rate": 8.167786362109506e-05, "loss": 1.0855, "step": 22030 }, { "epoch": 1.85, "grad_norm": 0.961615264415741, "learning_rate": 8.166946590527377e-05, "loss": 1.0665, "step": 22040 }, { "epoch": 1.85, "grad_norm": 0.9401275515556335, "learning_rate": 8.166106818945247e-05, "loss": 1.0871, "step": 22050 }, { "epoch": 1.85, "grad_norm": 0.9355718493461609, "learning_rate": 8.165267047363117e-05, "loss": 1.0628, "step": 22060 }, { "epoch": 1.85, "grad_norm": 0.9333938956260681, "learning_rate": 8.164427275780987e-05, "loss": 1.0686, "step": 22070 }, { "epoch": 1.85, "grad_norm": 0.9262435436248779, "learning_rate": 8.163587504198859e-05, "loss": 1.0774, "step": 22080 }, { "epoch": 1.85, "grad_norm": 0.881001889705658, "learning_rate": 8.162747732616729e-05, "loss": 1.0975, "step": 22090 }, { "epoch": 1.85, "grad_norm": 0.9583486318588257, "learning_rate": 8.161907961034599e-05, "loss": 1.0693, "step": 22100 }, { "epoch": 1.85, "grad_norm": 0.9099860191345215, "learning_rate": 8.16106818945247e-05, "loss": 1.0781, "step": 22110 }, { "epoch": 1.85, "grad_norm": 0.9885082244873047, "learning_rate": 8.160228417870339e-05, "loss": 1.081, "step": 22120 }, { "epoch": 1.86, "grad_norm": 0.9352201223373413, "learning_rate": 8.159388646288209e-05, "loss": 1.0734, "step": 22130 }, { "epoch": 1.86, "grad_norm": 0.9802115559577942, "learning_rate": 8.158548874706081e-05, "loss": 1.0791, "step": 22140 }, { "epoch": 1.86, "grad_norm": 0.951797366142273, "learning_rate": 8.157709103123951e-05, "loss": 1.0886, "step": 22150 }, { "epoch": 1.86, "grad_norm": 0.8874569535255432, "learning_rate": 8.156869331541821e-05, "loss": 1.09, "step": 22160 }, { "epoch": 1.86, "grad_norm": 1.0224614143371582, "learning_rate": 8.156029559959691e-05, "loss": 1.0689, "step": 22170 }, { "epoch": 1.86, "grad_norm": 0.9177655577659607, "learning_rate": 8.155189788377562e-05, "loss": 1.0579, "step": 22180 }, { "epoch": 1.86, "grad_norm": 0.9853928089141846, "learning_rate": 8.154350016795432e-05, "loss": 1.0966, "step": 22190 }, { "epoch": 1.86, "grad_norm": 0.974591076374054, "learning_rate": 8.153510245213302e-05, "loss": 1.0844, "step": 22200 }, { "epoch": 1.86, "grad_norm": 0.9281429052352905, "learning_rate": 8.152670473631174e-05, "loss": 1.0812, "step": 22210 }, { "epoch": 1.86, "grad_norm": 1.0357728004455566, "learning_rate": 8.151830702049043e-05, "loss": 1.1061, "step": 22220 }, { "epoch": 1.86, "grad_norm": 0.9517058730125427, "learning_rate": 8.150990930466913e-05, "loss": 1.0812, "step": 22230 }, { "epoch": 1.86, "grad_norm": 0.9043789505958557, "learning_rate": 8.150151158884784e-05, "loss": 1.0764, "step": 22240 }, { "epoch": 1.87, "grad_norm": 0.9185697436332703, "learning_rate": 8.149311387302654e-05, "loss": 1.0845, "step": 22250 }, { "epoch": 1.87, "grad_norm": 0.9180626273155212, "learning_rate": 8.148471615720524e-05, "loss": 1.0975, "step": 22260 }, { "epoch": 1.87, "grad_norm": 0.929567813873291, "learning_rate": 8.147631844138394e-05, "loss": 1.0699, "step": 22270 }, { "epoch": 1.87, "grad_norm": 0.914916455745697, "learning_rate": 8.146792072556266e-05, "loss": 1.0637, "step": 22280 }, { "epoch": 1.87, "grad_norm": 1.0016752481460571, "learning_rate": 8.145952300974136e-05, "loss": 1.0874, "step": 22290 }, { "epoch": 1.87, "grad_norm": 0.9593884944915771, "learning_rate": 8.145112529392006e-05, "loss": 1.0892, "step": 22300 }, { "epoch": 1.87, "grad_norm": 0.9632132053375244, "learning_rate": 8.144272757809876e-05, "loss": 1.0751, "step": 22310 }, { "epoch": 1.87, "grad_norm": 0.9335569739341736, "learning_rate": 8.143432986227747e-05, "loss": 1.0691, "step": 22320 }, { "epoch": 1.87, "grad_norm": 0.9385718703269958, "learning_rate": 8.142593214645616e-05, "loss": 1.0819, "step": 22330 }, { "epoch": 1.87, "grad_norm": 0.9883543848991394, "learning_rate": 8.141753443063486e-05, "loss": 1.1087, "step": 22340 }, { "epoch": 1.87, "grad_norm": 0.9951726198196411, "learning_rate": 8.140913671481358e-05, "loss": 1.0783, "step": 22350 }, { "epoch": 1.87, "grad_norm": 0.9235413074493408, "learning_rate": 8.140073899899228e-05, "loss": 1.0514, "step": 22360 }, { "epoch": 1.88, "grad_norm": 0.9246856570243835, "learning_rate": 8.139234128317098e-05, "loss": 1.0962, "step": 22370 }, { "epoch": 1.88, "grad_norm": 0.9286307692527771, "learning_rate": 8.138394356734969e-05, "loss": 1.0609, "step": 22380 }, { "epoch": 1.88, "grad_norm": 0.98885178565979, "learning_rate": 8.137554585152839e-05, "loss": 1.0703, "step": 22390 }, { "epoch": 1.88, "grad_norm": 0.9366748332977295, "learning_rate": 8.136714813570709e-05, "loss": 1.062, "step": 22400 }, { "epoch": 1.88, "grad_norm": 0.8993573784828186, "learning_rate": 8.135875041988579e-05, "loss": 1.0775, "step": 22410 }, { "epoch": 1.88, "grad_norm": 0.9327789545059204, "learning_rate": 8.135035270406451e-05, "loss": 1.0756, "step": 22420 }, { "epoch": 1.88, "grad_norm": 0.9567099809646606, "learning_rate": 8.13419549882432e-05, "loss": 1.0785, "step": 22430 }, { "epoch": 1.88, "grad_norm": 0.86855149269104, "learning_rate": 8.13335572724219e-05, "loss": 1.0636, "step": 22440 }, { "epoch": 1.88, "grad_norm": 0.9225255250930786, "learning_rate": 8.132515955660061e-05, "loss": 1.0647, "step": 22450 }, { "epoch": 1.88, "grad_norm": 0.9583046436309814, "learning_rate": 8.131676184077931e-05, "loss": 1.0691, "step": 22460 }, { "epoch": 1.88, "grad_norm": 0.9480694532394409, "learning_rate": 8.130836412495801e-05, "loss": 1.0844, "step": 22470 }, { "epoch": 1.88, "grad_norm": 0.9560478329658508, "learning_rate": 8.129996640913673e-05, "loss": 1.0658, "step": 22480 }, { "epoch": 1.89, "grad_norm": 0.9470974206924438, "learning_rate": 8.129156869331543e-05, "loss": 1.0805, "step": 22490 }, { "epoch": 1.89, "grad_norm": 0.9007673263549805, "learning_rate": 8.128317097749413e-05, "loss": 1.0868, "step": 22500 }, { "epoch": 1.89, "grad_norm": 0.9324938654899597, "learning_rate": 8.127477326167283e-05, "loss": 1.0575, "step": 22510 }, { "epoch": 1.89, "grad_norm": 0.9622287750244141, "learning_rate": 8.126637554585154e-05, "loss": 1.0618, "step": 22520 }, { "epoch": 1.89, "grad_norm": 0.8709717988967896, "learning_rate": 8.125797783003024e-05, "loss": 1.0648, "step": 22530 }, { "epoch": 1.89, "grad_norm": 0.9371627569198608, "learning_rate": 8.124958011420893e-05, "loss": 1.0953, "step": 22540 }, { "epoch": 1.89, "grad_norm": 0.948164701461792, "learning_rate": 8.124118239838764e-05, "loss": 1.0774, "step": 22550 }, { "epoch": 1.89, "grad_norm": 0.9619824290275574, "learning_rate": 8.123278468256634e-05, "loss": 1.0783, "step": 22560 }, { "epoch": 1.89, "grad_norm": 0.9640927910804749, "learning_rate": 8.122438696674504e-05, "loss": 1.0637, "step": 22570 }, { "epoch": 1.89, "grad_norm": 0.8726696968078613, "learning_rate": 8.121598925092375e-05, "loss": 1.0622, "step": 22580 }, { "epoch": 1.89, "grad_norm": 0.9290249943733215, "learning_rate": 8.120759153510246e-05, "loss": 1.0841, "step": 22590 }, { "epoch": 1.89, "grad_norm": 0.9070258736610413, "learning_rate": 8.119919381928116e-05, "loss": 1.0719, "step": 22600 }, { "epoch": 1.9, "grad_norm": 0.9861231446266174, "learning_rate": 8.119079610345986e-05, "loss": 1.0827, "step": 22610 }, { "epoch": 1.9, "grad_norm": 0.9475063681602478, "learning_rate": 8.118239838763858e-05, "loss": 1.0894, "step": 22620 }, { "epoch": 1.9, "grad_norm": 0.9697584509849548, "learning_rate": 8.117400067181728e-05, "loss": 1.0965, "step": 22630 }, { "epoch": 1.9, "grad_norm": 0.9208489060401917, "learning_rate": 8.116560295599598e-05, "loss": 1.0746, "step": 22640 }, { "epoch": 1.9, "grad_norm": 0.9453262686729431, "learning_rate": 8.115720524017468e-05, "loss": 1.1015, "step": 22650 }, { "epoch": 1.9, "grad_norm": 0.9898834824562073, "learning_rate": 8.114880752435338e-05, "loss": 1.0798, "step": 22660 }, { "epoch": 1.9, "grad_norm": 0.9766094088554382, "learning_rate": 8.114040980853208e-05, "loss": 1.062, "step": 22670 }, { "epoch": 1.9, "grad_norm": 0.9808549880981445, "learning_rate": 8.113201209271078e-05, "loss": 1.1038, "step": 22680 }, { "epoch": 1.9, "grad_norm": 0.8814826607704163, "learning_rate": 8.11236143768895e-05, "loss": 1.0757, "step": 22690 }, { "epoch": 1.9, "grad_norm": 0.9363024830818176, "learning_rate": 8.11152166610682e-05, "loss": 1.0753, "step": 22700 }, { "epoch": 1.9, "grad_norm": 0.8872799277305603, "learning_rate": 8.11068189452469e-05, "loss": 1.0931, "step": 22710 }, { "epoch": 1.9, "grad_norm": 0.9672569036483765, "learning_rate": 8.109842122942561e-05, "loss": 1.0803, "step": 22720 }, { "epoch": 1.91, "grad_norm": 0.9465705156326294, "learning_rate": 8.109002351360431e-05, "loss": 1.0726, "step": 22730 }, { "epoch": 1.91, "grad_norm": 0.9397774934768677, "learning_rate": 8.108162579778301e-05, "loss": 1.0746, "step": 22740 }, { "epoch": 1.91, "grad_norm": 0.9704985022544861, "learning_rate": 8.107322808196171e-05, "loss": 1.0571, "step": 22750 }, { "epoch": 1.91, "grad_norm": 0.961126446723938, "learning_rate": 8.106483036614041e-05, "loss": 1.0824, "step": 22760 }, { "epoch": 1.91, "grad_norm": 0.9272827506065369, "learning_rate": 8.105643265031911e-05, "loss": 1.0959, "step": 22770 }, { "epoch": 1.91, "grad_norm": 0.9782728552818298, "learning_rate": 8.104803493449781e-05, "loss": 1.0653, "step": 22780 }, { "epoch": 1.91, "grad_norm": 0.9585440158843994, "learning_rate": 8.103963721867653e-05, "loss": 1.0561, "step": 22790 }, { "epoch": 1.91, "grad_norm": 0.9587463736534119, "learning_rate": 8.103123950285523e-05, "loss": 1.0614, "step": 22800 }, { "epoch": 1.91, "grad_norm": 1.0167219638824463, "learning_rate": 8.102284178703393e-05, "loss": 1.079, "step": 22810 }, { "epoch": 1.91, "grad_norm": 0.9722715616226196, "learning_rate": 8.101444407121263e-05, "loss": 1.0879, "step": 22820 }, { "epoch": 1.91, "grad_norm": 1.0199732780456543, "learning_rate": 8.100604635539134e-05, "loss": 1.0716, "step": 22830 }, { "epoch": 1.91, "grad_norm": 0.9810200929641724, "learning_rate": 8.099764863957004e-05, "loss": 1.087, "step": 22840 }, { "epoch": 1.92, "grad_norm": 0.9538648128509521, "learning_rate": 8.098925092374875e-05, "loss": 1.0954, "step": 22850 }, { "epoch": 1.92, "grad_norm": 0.952927827835083, "learning_rate": 8.098085320792745e-05, "loss": 1.0757, "step": 22860 }, { "epoch": 1.92, "grad_norm": 0.9318287372589111, "learning_rate": 8.097245549210615e-05, "loss": 1.0864, "step": 22870 }, { "epoch": 1.92, "grad_norm": 0.94853675365448, "learning_rate": 8.096405777628485e-05, "loss": 1.0744, "step": 22880 }, { "epoch": 1.92, "grad_norm": 0.9416312575340271, "learning_rate": 8.095566006046356e-05, "loss": 1.0826, "step": 22890 }, { "epoch": 1.92, "grad_norm": 0.9539600014686584, "learning_rate": 8.094726234464226e-05, "loss": 1.0803, "step": 22900 }, { "epoch": 1.92, "grad_norm": 0.9516565203666687, "learning_rate": 8.093886462882096e-05, "loss": 1.0729, "step": 22910 }, { "epoch": 1.92, "grad_norm": 0.9808760285377502, "learning_rate": 8.093046691299966e-05, "loss": 1.0778, "step": 22920 }, { "epoch": 1.92, "grad_norm": 0.9635851979255676, "learning_rate": 8.092206919717838e-05, "loss": 1.1096, "step": 22930 }, { "epoch": 1.92, "grad_norm": 0.9341956973075867, "learning_rate": 8.091367148135708e-05, "loss": 1.0787, "step": 22940 }, { "epoch": 1.92, "grad_norm": 0.9545960426330566, "learning_rate": 8.090527376553578e-05, "loss": 1.106, "step": 22950 }, { "epoch": 1.92, "grad_norm": 0.996005654335022, "learning_rate": 8.089687604971448e-05, "loss": 1.0722, "step": 22960 }, { "epoch": 1.93, "grad_norm": 0.9320337176322937, "learning_rate": 8.088847833389318e-05, "loss": 1.0923, "step": 22970 }, { "epoch": 1.93, "grad_norm": 0.9646962881088257, "learning_rate": 8.088008061807188e-05, "loss": 1.088, "step": 22980 }, { "epoch": 1.93, "grad_norm": 0.9459730386734009, "learning_rate": 8.08716829022506e-05, "loss": 1.0723, "step": 22990 }, { "epoch": 1.93, "grad_norm": 1.0047719478607178, "learning_rate": 8.08632851864293e-05, "loss": 1.0615, "step": 23000 }, { "epoch": 1.93, "grad_norm": 1.0436536073684692, "learning_rate": 8.0854887470608e-05, "loss": 1.0672, "step": 23010 }, { "epoch": 1.93, "grad_norm": 0.9911855459213257, "learning_rate": 8.08464897547867e-05, "loss": 1.0861, "step": 23020 }, { "epoch": 1.93, "grad_norm": 0.9437386989593506, "learning_rate": 8.083809203896541e-05, "loss": 1.0601, "step": 23030 }, { "epoch": 1.93, "grad_norm": 0.9278973340988159, "learning_rate": 8.082969432314411e-05, "loss": 1.0567, "step": 23040 }, { "epoch": 1.93, "grad_norm": 0.962884783744812, "learning_rate": 8.082129660732281e-05, "loss": 1.0636, "step": 23050 }, { "epoch": 1.93, "grad_norm": 0.9525705575942993, "learning_rate": 8.081289889150151e-05, "loss": 1.0779, "step": 23060 }, { "epoch": 1.93, "grad_norm": 0.9483520984649658, "learning_rate": 8.080450117568021e-05, "loss": 1.0593, "step": 23070 }, { "epoch": 1.93, "grad_norm": 0.9891012907028198, "learning_rate": 8.079610345985892e-05, "loss": 1.0769, "step": 23080 }, { "epoch": 1.94, "grad_norm": 0.9191649556159973, "learning_rate": 8.078770574403762e-05, "loss": 1.0844, "step": 23090 }, { "epoch": 1.94, "grad_norm": 0.8751499652862549, "learning_rate": 8.077930802821633e-05, "loss": 1.063, "step": 23100 }, { "epoch": 1.94, "grad_norm": 0.9491960406303406, "learning_rate": 8.077091031239503e-05, "loss": 1.0772, "step": 23110 }, { "epoch": 1.94, "grad_norm": 0.8733270764350891, "learning_rate": 8.076251259657373e-05, "loss": 1.1049, "step": 23120 }, { "epoch": 1.94, "grad_norm": 0.9507169723510742, "learning_rate": 8.075411488075245e-05, "loss": 1.0715, "step": 23130 }, { "epoch": 1.94, "grad_norm": 0.9581748843193054, "learning_rate": 8.074571716493115e-05, "loss": 1.0776, "step": 23140 }, { "epoch": 1.94, "grad_norm": 0.9720478057861328, "learning_rate": 8.073731944910985e-05, "loss": 1.0703, "step": 23150 }, { "epoch": 1.94, "grad_norm": 0.9863296151161194, "learning_rate": 8.072892173328855e-05, "loss": 1.0729, "step": 23160 }, { "epoch": 1.94, "grad_norm": 0.9228429198265076, "learning_rate": 8.072052401746725e-05, "loss": 1.0739, "step": 23170 }, { "epoch": 1.94, "grad_norm": 0.95717453956604, "learning_rate": 8.071212630164595e-05, "loss": 1.0908, "step": 23180 }, { "epoch": 1.94, "grad_norm": 0.926839292049408, "learning_rate": 8.070372858582465e-05, "loss": 1.0702, "step": 23190 }, { "epoch": 1.94, "grad_norm": 0.9715284705162048, "learning_rate": 8.069533087000336e-05, "loss": 1.1033, "step": 23200 }, { "epoch": 1.95, "grad_norm": 0.9391289949417114, "learning_rate": 8.068693315418207e-05, "loss": 1.0439, "step": 23210 }, { "epoch": 1.95, "grad_norm": 0.9625375270843506, "learning_rate": 8.067853543836077e-05, "loss": 1.0894, "step": 23220 }, { "epoch": 1.95, "grad_norm": 0.9507083892822266, "learning_rate": 8.067013772253948e-05, "loss": 1.0776, "step": 23230 }, { "epoch": 1.95, "grad_norm": 0.9163172841072083, "learning_rate": 8.066174000671818e-05, "loss": 1.049, "step": 23240 }, { "epoch": 1.95, "grad_norm": 0.9412251114845276, "learning_rate": 8.065334229089688e-05, "loss": 1.0854, "step": 23250 }, { "epoch": 1.95, "grad_norm": 0.9556521773338318, "learning_rate": 8.064494457507558e-05, "loss": 1.1001, "step": 23260 }, { "epoch": 1.95, "grad_norm": 0.9202659130096436, "learning_rate": 8.06365468592543e-05, "loss": 1.0694, "step": 23270 }, { "epoch": 1.95, "grad_norm": 0.9295778870582581, "learning_rate": 8.062814914343298e-05, "loss": 1.0734, "step": 23280 }, { "epoch": 1.95, "grad_norm": 0.9153093099594116, "learning_rate": 8.061975142761168e-05, "loss": 1.0742, "step": 23290 }, { "epoch": 1.95, "grad_norm": 0.9639344811439514, "learning_rate": 8.06113537117904e-05, "loss": 1.0628, "step": 23300 }, { "epoch": 1.95, "grad_norm": 0.92649245262146, "learning_rate": 8.06029559959691e-05, "loss": 1.0664, "step": 23310 }, { "epoch": 1.95, "grad_norm": 0.9394589066505432, "learning_rate": 8.05945582801478e-05, "loss": 1.0517, "step": 23320 }, { "epoch": 1.96, "grad_norm": 0.853520393371582, "learning_rate": 8.05861605643265e-05, "loss": 1.0742, "step": 23330 }, { "epoch": 1.96, "grad_norm": 0.8790643215179443, "learning_rate": 8.057776284850521e-05, "loss": 1.066, "step": 23340 }, { "epoch": 1.96, "grad_norm": 0.9281080365180969, "learning_rate": 8.056936513268392e-05, "loss": 1.0934, "step": 23350 }, { "epoch": 1.96, "grad_norm": 0.9617061614990234, "learning_rate": 8.056096741686262e-05, "loss": 1.0645, "step": 23360 }, { "epoch": 1.96, "grad_norm": 1.0057429075241089, "learning_rate": 8.055256970104133e-05, "loss": 1.0467, "step": 23370 }, { "epoch": 1.96, "grad_norm": 0.934522271156311, "learning_rate": 8.054417198522003e-05, "loss": 1.1074, "step": 23380 }, { "epoch": 1.96, "grad_norm": 0.8520189523696899, "learning_rate": 8.053577426939872e-05, "loss": 1.0724, "step": 23390 }, { "epoch": 1.96, "grad_norm": 0.9682310223579407, "learning_rate": 8.052737655357743e-05, "loss": 1.0724, "step": 23400 }, { "epoch": 1.96, "grad_norm": 0.9934664368629456, "learning_rate": 8.051897883775613e-05, "loss": 1.053, "step": 23410 }, { "epoch": 1.96, "grad_norm": 0.938259482383728, "learning_rate": 8.051058112193483e-05, "loss": 1.054, "step": 23420 }, { "epoch": 1.96, "grad_norm": 0.9081432819366455, "learning_rate": 8.050218340611353e-05, "loss": 1.0939, "step": 23430 }, { "epoch": 1.97, "grad_norm": 1.004728078842163, "learning_rate": 8.049378569029225e-05, "loss": 1.0719, "step": 23440 }, { "epoch": 1.97, "grad_norm": 1.04097580909729, "learning_rate": 8.048538797447095e-05, "loss": 1.0872, "step": 23450 }, { "epoch": 1.97, "grad_norm": 0.9404523372650146, "learning_rate": 8.047699025864965e-05, "loss": 1.0627, "step": 23460 }, { "epoch": 1.97, "grad_norm": 0.9675320982933044, "learning_rate": 8.046859254282836e-05, "loss": 1.0611, "step": 23470 }, { "epoch": 1.97, "grad_norm": 1.0386711359024048, "learning_rate": 8.046019482700707e-05, "loss": 1.0842, "step": 23480 }, { "epoch": 1.97, "grad_norm": 0.9149951934814453, "learning_rate": 8.045179711118575e-05, "loss": 1.0578, "step": 23490 }, { "epoch": 1.97, "grad_norm": 0.9119037389755249, "learning_rate": 8.044339939536447e-05, "loss": 1.1014, "step": 23500 }, { "epoch": 1.97, "grad_norm": 0.9563122987747192, "learning_rate": 8.043500167954317e-05, "loss": 1.068, "step": 23510 }, { "epoch": 1.97, "grad_norm": 0.9674993753433228, "learning_rate": 8.042660396372187e-05, "loss": 1.0653, "step": 23520 }, { "epoch": 1.97, "grad_norm": 0.9631643891334534, "learning_rate": 8.041820624790057e-05, "loss": 1.0811, "step": 23530 }, { "epoch": 1.97, "grad_norm": 0.9869856238365173, "learning_rate": 8.040980853207928e-05, "loss": 1.0715, "step": 23540 }, { "epoch": 1.97, "grad_norm": 0.9949793815612793, "learning_rate": 8.040141081625798e-05, "loss": 1.0768, "step": 23550 }, { "epoch": 1.98, "grad_norm": 1.0151121616363525, "learning_rate": 8.039301310043668e-05, "loss": 1.0648, "step": 23560 }, { "epoch": 1.98, "grad_norm": 0.9796632528305054, "learning_rate": 8.038461538461538e-05, "loss": 1.0791, "step": 23570 }, { "epoch": 1.98, "grad_norm": 0.9762797951698303, "learning_rate": 8.03762176687941e-05, "loss": 1.0559, "step": 23580 }, { "epoch": 1.98, "grad_norm": 0.948366641998291, "learning_rate": 8.03678199529728e-05, "loss": 1.0756, "step": 23590 }, { "epoch": 1.98, "grad_norm": 0.9642534255981445, "learning_rate": 8.03594222371515e-05, "loss": 1.0903, "step": 23600 }, { "epoch": 1.98, "grad_norm": 0.9076772332191467, "learning_rate": 8.03510245213302e-05, "loss": 1.0742, "step": 23610 }, { "epoch": 1.98, "grad_norm": 0.9649861454963684, "learning_rate": 8.03426268055089e-05, "loss": 1.1095, "step": 23620 }, { "epoch": 1.98, "grad_norm": 1.0080221891403198, "learning_rate": 8.03342290896876e-05, "loss": 1.0679, "step": 23630 }, { "epoch": 1.98, "grad_norm": 0.9600275754928589, "learning_rate": 8.032583137386632e-05, "loss": 1.0607, "step": 23640 }, { "epoch": 1.98, "grad_norm": 1.002281904220581, "learning_rate": 8.031743365804502e-05, "loss": 1.0769, "step": 23650 }, { "epoch": 1.98, "grad_norm": 0.9854843616485596, "learning_rate": 8.030903594222372e-05, "loss": 1.0536, "step": 23660 }, { "epoch": 1.98, "grad_norm": 0.9507874846458435, "learning_rate": 8.030063822640242e-05, "loss": 1.0959, "step": 23670 }, { "epoch": 1.99, "grad_norm": 0.9268907904624939, "learning_rate": 8.029224051058113e-05, "loss": 1.0862, "step": 23680 }, { "epoch": 1.99, "grad_norm": 0.9411166310310364, "learning_rate": 8.028384279475983e-05, "loss": 1.0633, "step": 23690 }, { "epoch": 1.99, "grad_norm": 1.055272102355957, "learning_rate": 8.027544507893852e-05, "loss": 1.0693, "step": 23700 }, { "epoch": 1.99, "grad_norm": 0.9390666484832764, "learning_rate": 8.026704736311724e-05, "loss": 1.071, "step": 23710 }, { "epoch": 1.99, "grad_norm": 0.902016282081604, "learning_rate": 8.025864964729594e-05, "loss": 1.0802, "step": 23720 }, { "epoch": 1.99, "grad_norm": 0.9374504089355469, "learning_rate": 8.025025193147464e-05, "loss": 1.0863, "step": 23730 }, { "epoch": 1.99, "grad_norm": 0.9502925276756287, "learning_rate": 8.024185421565335e-05, "loss": 1.0681, "step": 23740 }, { "epoch": 1.99, "grad_norm": 0.9141817092895508, "learning_rate": 8.023345649983205e-05, "loss": 1.0916, "step": 23750 }, { "epoch": 1.99, "grad_norm": 0.9485606551170349, "learning_rate": 8.022505878401075e-05, "loss": 1.0686, "step": 23760 }, { "epoch": 1.99, "grad_norm": 0.9022987484931946, "learning_rate": 8.021666106818945e-05, "loss": 1.0722, "step": 23770 }, { "epoch": 1.99, "grad_norm": 0.999480128288269, "learning_rate": 8.020826335236817e-05, "loss": 1.0986, "step": 23780 }, { "epoch": 1.99, "grad_norm": 0.994338870048523, "learning_rate": 8.019986563654687e-05, "loss": 1.0546, "step": 23790 }, { "epoch": 2.0, "grad_norm": 1.007479190826416, "learning_rate": 8.019146792072557e-05, "loss": 1.0625, "step": 23800 }, { "epoch": 2.0, "grad_norm": 0.9863058924674988, "learning_rate": 8.018307020490427e-05, "loss": 1.065, "step": 23810 }, { "epoch": 2.0, "grad_norm": 0.9091801047325134, "learning_rate": 8.017467248908297e-05, "loss": 1.0705, "step": 23820 }, { "epoch": 2.0, "grad_norm": 0.9525049924850464, "learning_rate": 8.016627477326167e-05, "loss": 1.0779, "step": 23830 }, { "epoch": 2.0, "grad_norm": 1.005071997642517, "learning_rate": 8.015787705744038e-05, "loss": 1.0597, "step": 23840 }, { "epoch": 2.0, "grad_norm": 0.9436593651771545, "learning_rate": 8.014947934161909e-05, "loss": 1.078, "step": 23850 }, { "epoch": 2.0, "eval_loss": 1.3766697645187378, "eval_runtime": 6244.9948, "eval_samples_per_second": 265.867, "eval_steps_per_second": 4.154, "step": 23857 } ], "logging_steps": 10, "max_steps": 119280, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.0174807412263879e+19, "train_batch_size": 6, "trial_name": null, "trial_params": null }