{ "best_global_step": 50000, "best_metric": 0.4009660835826073, "best_model_checkpoint": "./whisper-translate-bn-or\\checkpoint-50000", "epoch": 5.508839585315771, "eval_steps": 10000, "global_step": 55000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010016527269995493, "grad_norm": 1.0699678659439087, "learning_rate": 0.0, "loss": 3.4658, "step": 1 }, { "epoch": 0.0010016527269995493, "grad_norm": 0.946382999420166, "learning_rate": 4.5e-06, "loss": 3.1873, "step": 10 }, { "epoch": 0.0020033054539990987, "grad_norm": 1.0159204006195068, "learning_rate": 9.5e-06, "loss": 3.3418, "step": 20 }, { "epoch": 0.0030049581809986478, "grad_norm": 0.963729202747345, "learning_rate": 1.45e-05, "loss": 3.2141, "step": 30 }, { "epoch": 0.004006610907998197, "grad_norm": 1.0494544506072998, "learning_rate": 1.9500000000000003e-05, "loss": 3.2094, "step": 40 }, { "epoch": 0.005008263634997746, "grad_norm": 1.1693147420883179, "learning_rate": 2.45e-05, "loss": 3.1064, "step": 50 }, { "epoch": 0.0060099163619972956, "grad_norm": 1.0876061916351318, "learning_rate": 2.95e-05, "loss": 2.8635, "step": 60 }, { "epoch": 0.007011569088996845, "grad_norm": 1.2751100063323975, "learning_rate": 3.45e-05, "loss": 2.7471, "step": 70 }, { "epoch": 0.008013221815996395, "grad_norm": 1.3546741008758545, "learning_rate": 3.9500000000000005e-05, "loss": 2.6155, "step": 80 }, { "epoch": 0.009014874542995942, "grad_norm": 1.6993346214294434, "learning_rate": 4.4500000000000004e-05, "loss": 2.5273, "step": 90 }, { "epoch": 0.010016527269995492, "grad_norm": 1.595080852508545, "learning_rate": 4.9500000000000004e-05, "loss": 2.413, "step": 100 }, { "epoch": 0.011018179996995042, "grad_norm": 1.627835750579834, "learning_rate": 4.9999995973874405e-05, "loss": 2.1941, "step": 110 }, { "epoch": 0.012019832723994591, "grad_norm": 1.5041255950927734, "learning_rate": 4.999998205640485e-05, "loss": 2.1507, "step": 120 }, { "epoch": 0.01302148545099414, "grad_norm": 1.970567226409912, "learning_rate": 4.999995819789164e-05, "loss": 2.0218, "step": 130 }, { "epoch": 0.01402313817799369, "grad_norm": 1.9568697214126587, "learning_rate": 4.999992439834424e-05, "loss": 1.6866, "step": 140 }, { "epoch": 0.015024790904993238, "grad_norm": 2.1707658767700195, "learning_rate": 4.99998806577761e-05, "loss": 1.6941, "step": 150 }, { "epoch": 0.01602644363199279, "grad_norm": 2.4342153072357178, "learning_rate": 4.999982697620461e-05, "loss": 1.7162, "step": 160 }, { "epoch": 0.01702809635899234, "grad_norm": 2.1355204582214355, "learning_rate": 4.9999763353651117e-05, "loss": 1.6787, "step": 170 }, { "epoch": 0.018029749085991885, "grad_norm": 2.1012508869171143, "learning_rate": 4.999968979014093e-05, "loss": 1.5662, "step": 180 }, { "epoch": 0.019031401812991434, "grad_norm": 1.8109233379364014, "learning_rate": 4.999960628570328e-05, "loss": 1.5796, "step": 190 }, { "epoch": 0.020033054539990984, "grad_norm": 2.47389554977417, "learning_rate": 4.9999512840371386e-05, "loss": 1.6256, "step": 200 }, { "epoch": 0.021034707266990534, "grad_norm": 1.9195002317428589, "learning_rate": 4.999940945418241e-05, "loss": 1.5094, "step": 210 }, { "epoch": 0.022036359993990083, "grad_norm": 2.2418041229248047, "learning_rate": 4.9999296127177453e-05, "loss": 1.5252, "step": 220 }, { "epoch": 0.023038012720989633, "grad_norm": 2.5955166816711426, "learning_rate": 4.999917285940158e-05, "loss": 1.5686, "step": 230 }, { "epoch": 0.024039665447989182, "grad_norm": 1.936550498008728, "learning_rate": 4.999903965090381e-05, "loss": 1.5285, "step": 240 }, { "epoch": 0.025041318174988732, "grad_norm": 1.894722819328308, "learning_rate": 4.9998896501737116e-05, "loss": 1.5413, "step": 250 }, { "epoch": 0.02604297090198828, "grad_norm": 2.192305326461792, "learning_rate": 4.99987434119584e-05, "loss": 1.5218, "step": 260 }, { "epoch": 0.02704462362898783, "grad_norm": 2.490818500518799, "learning_rate": 4.999858038162857e-05, "loss": 1.4895, "step": 270 }, { "epoch": 0.02804627635598738, "grad_norm": 2.2005741596221924, "learning_rate": 4.999840741081243e-05, "loss": 1.481, "step": 280 }, { "epoch": 0.02904792908298693, "grad_norm": 2.2879180908203125, "learning_rate": 4.999822449957876e-05, "loss": 1.3541, "step": 290 }, { "epoch": 0.030049581809986476, "grad_norm": 1.9570573568344116, "learning_rate": 4.999803164800031e-05, "loss": 1.4484, "step": 300 }, { "epoch": 0.031051234536986026, "grad_norm": 2.203392744064331, "learning_rate": 4.999782885615375e-05, "loss": 1.4385, "step": 310 }, { "epoch": 0.03205288726398558, "grad_norm": 2.16286563873291, "learning_rate": 4.999761612411972e-05, "loss": 1.4522, "step": 320 }, { "epoch": 0.03305453999098513, "grad_norm": 2.594715118408203, "learning_rate": 4.999739345198282e-05, "loss": 1.4169, "step": 330 }, { "epoch": 0.03405619271798468, "grad_norm": 2.793673515319824, "learning_rate": 4.999716083983159e-05, "loss": 1.525, "step": 340 }, { "epoch": 0.03505784544498423, "grad_norm": 2.3903636932373047, "learning_rate": 4.999691828775853e-05, "loss": 1.4475, "step": 350 }, { "epoch": 0.03605949817198377, "grad_norm": 2.7492942810058594, "learning_rate": 4.999666579586008e-05, "loss": 1.397, "step": 360 }, { "epoch": 0.03706115089898332, "grad_norm": 2.458517551422119, "learning_rate": 4.9996403364236645e-05, "loss": 1.3987, "step": 370 }, { "epoch": 0.03806280362598287, "grad_norm": 2.7841598987579346, "learning_rate": 4.999613099299258e-05, "loss": 1.3781, "step": 380 }, { "epoch": 0.03906445635298242, "grad_norm": 2.5349690914154053, "learning_rate": 4.9995848682236194e-05, "loss": 1.3281, "step": 390 }, { "epoch": 0.04006610907998197, "grad_norm": 2.858383893966675, "learning_rate": 4.999555643207974e-05, "loss": 1.3173, "step": 400 }, { "epoch": 0.04106776180698152, "grad_norm": 2.612161874771118, "learning_rate": 4.999525424263943e-05, "loss": 1.4123, "step": 410 }, { "epoch": 0.04206941453398107, "grad_norm": 2.9477760791778564, "learning_rate": 4.9994942114035435e-05, "loss": 1.3716, "step": 420 }, { "epoch": 0.04307106726098062, "grad_norm": 2.3722777366638184, "learning_rate": 4.999462004639186e-05, "loss": 1.3067, "step": 430 }, { "epoch": 0.044072719987980166, "grad_norm": 2.594200611114502, "learning_rate": 4.999428803983678e-05, "loss": 1.3702, "step": 440 }, { "epoch": 0.045074372714979716, "grad_norm": 2.670445203781128, "learning_rate": 4.99939460945022e-05, "loss": 1.3728, "step": 450 }, { "epoch": 0.046076025441979265, "grad_norm": 2.574885606765747, "learning_rate": 4.9993594210524115e-05, "loss": 1.351, "step": 460 }, { "epoch": 0.047077678168978815, "grad_norm": 2.1980414390563965, "learning_rate": 4.999323238804243e-05, "loss": 1.425, "step": 470 }, { "epoch": 0.048079330895978364, "grad_norm": 2.689714193344116, "learning_rate": 4.9992860627201034e-05, "loss": 1.3985, "step": 480 }, { "epoch": 0.049080983622977914, "grad_norm": 2.989649772644043, "learning_rate": 4.999247892814775e-05, "loss": 1.3723, "step": 490 }, { "epoch": 0.050082636349977463, "grad_norm": 2.6088790893554688, "learning_rate": 4.999208729103435e-05, "loss": 1.2535, "step": 500 }, { "epoch": 0.05108428907697701, "grad_norm": 2.829699754714966, "learning_rate": 4.999168571601658e-05, "loss": 1.2934, "step": 510 }, { "epoch": 0.05208594180397656, "grad_norm": 1.9769593477249146, "learning_rate": 4.999127420325411e-05, "loss": 1.1993, "step": 520 }, { "epoch": 0.05308759453097611, "grad_norm": 2.4946117401123047, "learning_rate": 4.9990852752910576e-05, "loss": 1.2418, "step": 530 }, { "epoch": 0.05408924725797566, "grad_norm": 2.7048685550689697, "learning_rate": 4.999042136515358e-05, "loss": 1.2653, "step": 540 }, { "epoch": 0.05509089998497521, "grad_norm": 2.877206802368164, "learning_rate": 4.998998004015464e-05, "loss": 1.2766, "step": 550 }, { "epoch": 0.05609255271197476, "grad_norm": 2.246899366378784, "learning_rate": 4.998952877808925e-05, "loss": 1.3626, "step": 560 }, { "epoch": 0.05709420543897431, "grad_norm": 2.2098586559295654, "learning_rate": 4.998906757913686e-05, "loss": 1.2818, "step": 570 }, { "epoch": 0.05809585816597386, "grad_norm": 3.2610485553741455, "learning_rate": 4.998859644348085e-05, "loss": 1.2254, "step": 580 }, { "epoch": 0.05909751089297341, "grad_norm": 3.1614911556243896, "learning_rate": 4.998811537130857e-05, "loss": 1.2474, "step": 590 }, { "epoch": 0.06009916361997295, "grad_norm": 2.5031402111053467, "learning_rate": 4.9987624362811324e-05, "loss": 1.1746, "step": 600 }, { "epoch": 0.0611008163469725, "grad_norm": 2.7153284549713135, "learning_rate": 4.9987123418184344e-05, "loss": 1.285, "step": 610 }, { "epoch": 0.06210246907397205, "grad_norm": 2.6688461303710938, "learning_rate": 4.998661253762683e-05, "loss": 1.2516, "step": 620 }, { "epoch": 0.06310412180097161, "grad_norm": 2.5687406063079834, "learning_rate": 4.998609172134192e-05, "loss": 1.1905, "step": 630 }, { "epoch": 0.06410577452797116, "grad_norm": 2.876481533050537, "learning_rate": 4.9985560969536736e-05, "loss": 1.192, "step": 640 }, { "epoch": 0.0651074272549707, "grad_norm": 3.5918831825256348, "learning_rate": 4.9985020282422304e-05, "loss": 1.3036, "step": 650 }, { "epoch": 0.06610907998197026, "grad_norm": 2.464890718460083, "learning_rate": 4.998446966021364e-05, "loss": 1.1751, "step": 660 }, { "epoch": 0.0671107327089698, "grad_norm": 2.5518927574157715, "learning_rate": 4.998390910312969e-05, "loss": 1.2133, "step": 670 }, { "epoch": 0.06811238543596936, "grad_norm": 2.709120512008667, "learning_rate": 4.998333861139335e-05, "loss": 1.2233, "step": 680 }, { "epoch": 0.0691140381629689, "grad_norm": 2.6371023654937744, "learning_rate": 4.998275818523147e-05, "loss": 1.1879, "step": 690 }, { "epoch": 0.07011569088996845, "grad_norm": 2.7895636558532715, "learning_rate": 4.998216782487486e-05, "loss": 1.1956, "step": 700 }, { "epoch": 0.071117343616968, "grad_norm": 2.9015183448791504, "learning_rate": 4.998156753055826e-05, "loss": 1.1716, "step": 710 }, { "epoch": 0.07211899634396754, "grad_norm": 3.0008327960968018, "learning_rate": 4.9980957302520396e-05, "loss": 1.2089, "step": 720 }, { "epoch": 0.07312064907096709, "grad_norm": 2.6571829319000244, "learning_rate": 4.9980337141003895e-05, "loss": 1.1451, "step": 730 }, { "epoch": 0.07412230179796664, "grad_norm": 2.932692766189575, "learning_rate": 4.9979707046255376e-05, "loss": 1.1582, "step": 740 }, { "epoch": 0.07512395452496619, "grad_norm": 2.8995957374572754, "learning_rate": 4.9979067018525383e-05, "loss": 1.1619, "step": 750 }, { "epoch": 0.07612560725196574, "grad_norm": 2.709153890609741, "learning_rate": 4.9978417058068414e-05, "loss": 1.202, "step": 760 }, { "epoch": 0.07712725997896529, "grad_norm": 2.4261250495910645, "learning_rate": 4.997775716514293e-05, "loss": 1.2184, "step": 770 }, { "epoch": 0.07812891270596484, "grad_norm": 2.8053817749023438, "learning_rate": 4.997708734001133e-05, "loss": 1.2656, "step": 780 }, { "epoch": 0.07913056543296439, "grad_norm": 2.360339879989624, "learning_rate": 4.997640758293996e-05, "loss": 1.1717, "step": 790 }, { "epoch": 0.08013221815996394, "grad_norm": 2.579951524734497, "learning_rate": 4.9975717894199126e-05, "loss": 1.2521, "step": 800 }, { "epoch": 0.08113387088696349, "grad_norm": 2.9729485511779785, "learning_rate": 4.997501827406307e-05, "loss": 1.1597, "step": 810 }, { "epoch": 0.08213552361396304, "grad_norm": 3.1533076763153076, "learning_rate": 4.997430872280999e-05, "loss": 1.1519, "step": 820 }, { "epoch": 0.08313717634096258, "grad_norm": 4.105657577514648, "learning_rate": 4.997358924072205e-05, "loss": 1.1856, "step": 830 }, { "epoch": 0.08413882906796213, "grad_norm": 2.565356731414795, "learning_rate": 4.997285982808533e-05, "loss": 1.232, "step": 840 }, { "epoch": 0.08514048179496168, "grad_norm": 3.074532985687256, "learning_rate": 4.9972120485189874e-05, "loss": 1.1723, "step": 850 }, { "epoch": 0.08614213452196123, "grad_norm": 2.80393123626709, "learning_rate": 4.997137121232969e-05, "loss": 1.2643, "step": 860 }, { "epoch": 0.08714378724896078, "grad_norm": 2.783311128616333, "learning_rate": 4.99706120098027e-05, "loss": 1.1941, "step": 870 }, { "epoch": 0.08814543997596033, "grad_norm": 2.6996560096740723, "learning_rate": 4.9969842877910814e-05, "loss": 1.18, "step": 880 }, { "epoch": 0.08914709270295988, "grad_norm": 3.0572733879089355, "learning_rate": 4.996906381695986e-05, "loss": 1.1936, "step": 890 }, { "epoch": 0.09014874542995943, "grad_norm": 2.5547292232513428, "learning_rate": 4.996827482725963e-05, "loss": 1.2188, "step": 900 }, { "epoch": 0.09115039815695898, "grad_norm": 2.564838171005249, "learning_rate": 4.996747590912386e-05, "loss": 1.1149, "step": 910 }, { "epoch": 0.09215205088395853, "grad_norm": 2.6100144386291504, "learning_rate": 4.9966667062870235e-05, "loss": 1.1286, "step": 920 }, { "epoch": 0.09315370361095808, "grad_norm": 2.742168664932251, "learning_rate": 4.996584828882038e-05, "loss": 1.1574, "step": 930 }, { "epoch": 0.09415535633795763, "grad_norm": 2.3155078887939453, "learning_rate": 4.9965019587299874e-05, "loss": 1.0794, "step": 940 }, { "epoch": 0.09515700906495718, "grad_norm": 3.218953847885132, "learning_rate": 4.996418095863824e-05, "loss": 1.1358, "step": 950 }, { "epoch": 0.09615866179195673, "grad_norm": 2.9602880477905273, "learning_rate": 4.996333240316897e-05, "loss": 1.1506, "step": 960 }, { "epoch": 0.09716031451895628, "grad_norm": 2.4367001056671143, "learning_rate": 4.996247392122948e-05, "loss": 1.1119, "step": 970 }, { "epoch": 0.09816196724595583, "grad_norm": 2.4974889755249023, "learning_rate": 4.996160551316112e-05, "loss": 1.1244, "step": 980 }, { "epoch": 0.09916361997295538, "grad_norm": 3.0747549533843994, "learning_rate": 4.9960727179309216e-05, "loss": 1.1237, "step": 990 }, { "epoch": 0.10016527269995493, "grad_norm": 3.293057441711426, "learning_rate": 4.9959838920023036e-05, "loss": 1.1125, "step": 1000 }, { "epoch": 0.10116692542695448, "grad_norm": 3.2793030738830566, "learning_rate": 4.995894073565579e-05, "loss": 1.1349, "step": 1010 }, { "epoch": 0.10216857815395403, "grad_norm": 2.5710489749908447, "learning_rate": 4.995803262656463e-05, "loss": 1.1671, "step": 1020 }, { "epoch": 0.10317023088095358, "grad_norm": 2.81728458404541, "learning_rate": 4.995711459311065e-05, "loss": 1.098, "step": 1030 }, { "epoch": 0.10417188360795313, "grad_norm": 2.4683609008789062, "learning_rate": 4.995618663565892e-05, "loss": 1.0697, "step": 1040 }, { "epoch": 0.10517353633495267, "grad_norm": 2.7064619064331055, "learning_rate": 4.995524875457841e-05, "loss": 1.1379, "step": 1050 }, { "epoch": 0.10617518906195222, "grad_norm": 2.810743570327759, "learning_rate": 4.995430095024207e-05, "loss": 1.1032, "step": 1060 }, { "epoch": 0.10717684178895177, "grad_norm": 2.8417282104492188, "learning_rate": 4.9953343223026796e-05, "loss": 1.1375, "step": 1070 }, { "epoch": 0.10817849451595132, "grad_norm": 2.6498005390167236, "learning_rate": 4.9952375573313415e-05, "loss": 1.0805, "step": 1080 }, { "epoch": 0.10918014724295087, "grad_norm": 2.90468692779541, "learning_rate": 4.9951398001486704e-05, "loss": 1.1367, "step": 1090 }, { "epoch": 0.11018179996995042, "grad_norm": 2.9180171489715576, "learning_rate": 4.995041050793538e-05, "loss": 1.0368, "step": 1100 }, { "epoch": 0.11118345269694997, "grad_norm": 2.695122480392456, "learning_rate": 4.994941309305212e-05, "loss": 1.1418, "step": 1110 }, { "epoch": 0.11218510542394952, "grad_norm": 2.9390275478363037, "learning_rate": 4.994840575723354e-05, "loss": 1.1555, "step": 1120 }, { "epoch": 0.11318675815094907, "grad_norm": 3.146312713623047, "learning_rate": 4.99473885008802e-05, "loss": 1.105, "step": 1130 }, { "epoch": 0.11418841087794862, "grad_norm": 2.8160715103149414, "learning_rate": 4.99463613243966e-05, "loss": 1.0991, "step": 1140 }, { "epoch": 0.11519006360494817, "grad_norm": 2.578988790512085, "learning_rate": 4.994532422819117e-05, "loss": 1.0957, "step": 1150 }, { "epoch": 0.11619171633194772, "grad_norm": 2.783162832260132, "learning_rate": 4.994427721267634e-05, "loss": 1.0774, "step": 1160 }, { "epoch": 0.11719336905894727, "grad_norm": 2.6223232746124268, "learning_rate": 4.9943220278268424e-05, "loss": 1.1457, "step": 1170 }, { "epoch": 0.11819502178594682, "grad_norm": 3.61311411857605, "learning_rate": 4.994215342538771e-05, "loss": 1.1417, "step": 1180 }, { "epoch": 0.11919667451294637, "grad_norm": 2.6800315380096436, "learning_rate": 4.994107665445841e-05, "loss": 1.1383, "step": 1190 }, { "epoch": 0.1201983272399459, "grad_norm": 2.0521180629730225, "learning_rate": 4.993998996590872e-05, "loss": 0.992, "step": 1200 }, { "epoch": 0.12119997996694545, "grad_norm": 3.1667087078094482, "learning_rate": 4.9938893360170726e-05, "loss": 1.0216, "step": 1210 }, { "epoch": 0.122201632693945, "grad_norm": 3.2581300735473633, "learning_rate": 4.99377868376805e-05, "loss": 1.0763, "step": 1220 }, { "epoch": 0.12320328542094455, "grad_norm": 2.8601009845733643, "learning_rate": 4.993667039887804e-05, "loss": 1.1149, "step": 1230 }, { "epoch": 0.1242049381479441, "grad_norm": 2.900925636291504, "learning_rate": 4.99355440442073e-05, "loss": 1.0522, "step": 1240 }, { "epoch": 0.12520659087494365, "grad_norm": 2.9990346431732178, "learning_rate": 4.993440777411613e-05, "loss": 1.0632, "step": 1250 }, { "epoch": 0.12620824360194322, "grad_norm": 2.717073917388916, "learning_rate": 4.99332615890564e-05, "loss": 1.0612, "step": 1260 }, { "epoch": 0.12720989632894275, "grad_norm": 3.6020913124084473, "learning_rate": 4.993210548948385e-05, "loss": 0.9953, "step": 1270 }, { "epoch": 0.12821154905594231, "grad_norm": 2.9735605716705322, "learning_rate": 4.993093947585822e-05, "loss": 1.1802, "step": 1280 }, { "epoch": 0.12921320178294185, "grad_norm": 2.907518148422241, "learning_rate": 4.9929763548643145e-05, "loss": 1.1365, "step": 1290 }, { "epoch": 0.1302148545099414, "grad_norm": 2.708824396133423, "learning_rate": 4.9928577708306235e-05, "loss": 1.0376, "step": 1300 }, { "epoch": 0.13121650723694095, "grad_norm": 3.0437746047973633, "learning_rate": 4.992738195531902e-05, "loss": 1.1522, "step": 1310 }, { "epoch": 0.1322181599639405, "grad_norm": 3.0373198986053467, "learning_rate": 4.9926176290157e-05, "loss": 1.0515, "step": 1320 }, { "epoch": 0.13321981269094005, "grad_norm": 2.457960367202759, "learning_rate": 4.9924960713299575e-05, "loss": 1.0844, "step": 1330 }, { "epoch": 0.1342214654179396, "grad_norm": 3.0508358478546143, "learning_rate": 4.9923735225230127e-05, "loss": 1.0746, "step": 1340 }, { "epoch": 0.13522311814493915, "grad_norm": 2.9356350898742676, "learning_rate": 4.992249982643595e-05, "loss": 1.1505, "step": 1350 }, { "epoch": 0.1362247708719387, "grad_norm": 3.3241400718688965, "learning_rate": 4.99212545174083e-05, "loss": 1.1369, "step": 1360 }, { "epoch": 0.13722642359893825, "grad_norm": 3.232041120529175, "learning_rate": 4.9919999298642364e-05, "loss": 0.9972, "step": 1370 }, { "epoch": 0.1382280763259378, "grad_norm": 2.7437665462493896, "learning_rate": 4.991873417063726e-05, "loss": 1.0838, "step": 1380 }, { "epoch": 0.13922972905293735, "grad_norm": 2.6859841346740723, "learning_rate": 4.991745913389606e-05, "loss": 1.0474, "step": 1390 }, { "epoch": 0.1402313817799369, "grad_norm": 2.495213508605957, "learning_rate": 4.9916174188925776e-05, "loss": 1.0574, "step": 1400 }, { "epoch": 0.14123303450693644, "grad_norm": 2.9001944065093994, "learning_rate": 4.991487933623736e-05, "loss": 0.9758, "step": 1410 }, { "epoch": 0.142234687233936, "grad_norm": 2.9604761600494385, "learning_rate": 4.991357457634569e-05, "loss": 1.0665, "step": 1420 }, { "epoch": 0.14323633996093554, "grad_norm": 2.8431453704833984, "learning_rate": 4.9912259909769595e-05, "loss": 1.1466, "step": 1430 }, { "epoch": 0.14423799268793508, "grad_norm": 3.013521909713745, "learning_rate": 4.991093533703184e-05, "loss": 1.0455, "step": 1440 }, { "epoch": 0.14523964541493464, "grad_norm": 3.180718421936035, "learning_rate": 4.9909600858659146e-05, "loss": 1.1061, "step": 1450 }, { "epoch": 0.14624129814193418, "grad_norm": 2.9873087406158447, "learning_rate": 4.9908256475182133e-05, "loss": 0.9961, "step": 1460 }, { "epoch": 0.14724295086893374, "grad_norm": 2.9176833629608154, "learning_rate": 4.990690218713541e-05, "loss": 1.0316, "step": 1470 }, { "epoch": 0.14824460359593328, "grad_norm": 2.420295476913452, "learning_rate": 4.990553799505748e-05, "loss": 1.03, "step": 1480 }, { "epoch": 0.14924625632293284, "grad_norm": 2.4549472332000732, "learning_rate": 4.990416389949081e-05, "loss": 1.0582, "step": 1490 }, { "epoch": 0.15024790904993238, "grad_norm": 3.2612345218658447, "learning_rate": 4.99027799009818e-05, "loss": 1.0485, "step": 1500 }, { "epoch": 0.15124956177693194, "grad_norm": 2.7688887119293213, "learning_rate": 4.9901386000080785e-05, "loss": 1.0013, "step": 1510 }, { "epoch": 0.15225121450393148, "grad_norm": 2.7964026927948, "learning_rate": 4.989998219734203e-05, "loss": 0.9652, "step": 1520 }, { "epoch": 0.15325286723093104, "grad_norm": 2.564159393310547, "learning_rate": 4.989856849332376e-05, "loss": 0.9539, "step": 1530 }, { "epoch": 0.15425451995793057, "grad_norm": 2.9798643589019775, "learning_rate": 4.989714488858812e-05, "loss": 1.0204, "step": 1540 }, { "epoch": 0.15525617268493014, "grad_norm": 3.4313089847564697, "learning_rate": 4.989571138370118e-05, "loss": 0.9848, "step": 1550 }, { "epoch": 0.15625782541192967, "grad_norm": 3.4639475345611572, "learning_rate": 4.989426797923299e-05, "loss": 1.0276, "step": 1560 }, { "epoch": 0.15725947813892924, "grad_norm": 2.630809783935547, "learning_rate": 4.989281467575748e-05, "loss": 1.0028, "step": 1570 }, { "epoch": 0.15826113086592877, "grad_norm": 3.15830397605896, "learning_rate": 4.989135147385255e-05, "loss": 0.9953, "step": 1580 }, { "epoch": 0.15926278359292834, "grad_norm": 2.3013908863067627, "learning_rate": 4.9889878374100054e-05, "loss": 0.9445, "step": 1590 }, { "epoch": 0.16026443631992787, "grad_norm": 2.9990906715393066, "learning_rate": 4.9888395377085734e-05, "loss": 1.0629, "step": 1600 }, { "epoch": 0.16126608904692744, "grad_norm": 2.9761180877685547, "learning_rate": 4.98869024833993e-05, "loss": 1.0075, "step": 1610 }, { "epoch": 0.16226774177392697, "grad_norm": 2.7043395042419434, "learning_rate": 4.9885399693634385e-05, "loss": 1.0862, "step": 1620 }, { "epoch": 0.16326939450092653, "grad_norm": 2.7392871379852295, "learning_rate": 4.988388700838856e-05, "loss": 1.0467, "step": 1630 }, { "epoch": 0.16427104722792607, "grad_norm": 2.6225688457489014, "learning_rate": 4.988236442826335e-05, "loss": 1.0083, "step": 1640 }, { "epoch": 0.16527269995492563, "grad_norm": 2.732118606567383, "learning_rate": 4.988083195386418e-05, "loss": 1.0408, "step": 1650 }, { "epoch": 0.16627435268192517, "grad_norm": 3.136059284210205, "learning_rate": 4.987928958580043e-05, "loss": 1.0722, "step": 1660 }, { "epoch": 0.16727600540892473, "grad_norm": 3.0781443119049072, "learning_rate": 4.987773732468541e-05, "loss": 1.0305, "step": 1670 }, { "epoch": 0.16827765813592427, "grad_norm": 2.469252824783325, "learning_rate": 4.9876175171136366e-05, "loss": 1.0357, "step": 1680 }, { "epoch": 0.16927931086292383, "grad_norm": 2.5065886974334717, "learning_rate": 4.9874603125774465e-05, "loss": 0.9225, "step": 1690 }, { "epoch": 0.17028096358992337, "grad_norm": 2.7271981239318848, "learning_rate": 4.987302118922484e-05, "loss": 0.9579, "step": 1700 }, { "epoch": 0.17128261631692293, "grad_norm": 3.1236214637756348, "learning_rate": 4.9871429362116517e-05, "loss": 0.9902, "step": 1710 }, { "epoch": 0.17228426904392247, "grad_norm": 2.5821967124938965, "learning_rate": 4.986982764508248e-05, "loss": 0.9573, "step": 1720 }, { "epoch": 0.17328592177092203, "grad_norm": 2.606112480163574, "learning_rate": 4.986821603875964e-05, "loss": 1.0657, "step": 1730 }, { "epoch": 0.17428757449792157, "grad_norm": 2.6190571784973145, "learning_rate": 4.986659454378885e-05, "loss": 1.0047, "step": 1740 }, { "epoch": 0.17528922722492113, "grad_norm": 2.8123345375061035, "learning_rate": 4.986496316081486e-05, "loss": 0.994, "step": 1750 }, { "epoch": 0.17629087995192066, "grad_norm": 2.665658712387085, "learning_rate": 4.9863321890486386e-05, "loss": 0.956, "step": 1760 }, { "epoch": 0.17729253267892023, "grad_norm": 2.5470478534698486, "learning_rate": 4.986167073345608e-05, "loss": 1.0184, "step": 1770 }, { "epoch": 0.17829418540591976, "grad_norm": 3.3314783573150635, "learning_rate": 4.986000969038049e-05, "loss": 0.9555, "step": 1780 }, { "epoch": 0.17929583813291933, "grad_norm": 3.4173240661621094, "learning_rate": 4.985833876192013e-05, "loss": 0.9164, "step": 1790 }, { "epoch": 0.18029749085991886, "grad_norm": 3.0542538166046143, "learning_rate": 4.985665794873944e-05, "loss": 0.992, "step": 1800 }, { "epoch": 0.18129914358691843, "grad_norm": 3.4689574241638184, "learning_rate": 4.9854967251506755e-05, "loss": 1.075, "step": 1810 }, { "epoch": 0.18230079631391796, "grad_norm": 2.872692346572876, "learning_rate": 4.98532666708944e-05, "loss": 1.0528, "step": 1820 }, { "epoch": 0.18330244904091753, "grad_norm": 2.7932639122009277, "learning_rate": 4.9851556207578565e-05, "loss": 0.9843, "step": 1830 }, { "epoch": 0.18430410176791706, "grad_norm": 2.615530014038086, "learning_rate": 4.9849835862239426e-05, "loss": 0.8914, "step": 1840 }, { "epoch": 0.18530575449491662, "grad_norm": 2.508965253829956, "learning_rate": 4.984810563556106e-05, "loss": 1.0039, "step": 1850 }, { "epoch": 0.18630740722191616, "grad_norm": 2.9795517921447754, "learning_rate": 4.9846365528231466e-05, "loss": 0.9747, "step": 1860 }, { "epoch": 0.18730905994891572, "grad_norm": 3.470461845397949, "learning_rate": 4.9844615540942584e-05, "loss": 0.9412, "step": 1870 }, { "epoch": 0.18831071267591526, "grad_norm": 3.2644646167755127, "learning_rate": 4.9842855674390306e-05, "loss": 0.9621, "step": 1880 }, { "epoch": 0.18931236540291482, "grad_norm": 2.799272298812866, "learning_rate": 4.9841085929274404e-05, "loss": 1.0094, "step": 1890 }, { "epoch": 0.19031401812991436, "grad_norm": 2.402174949645996, "learning_rate": 4.983930630629862e-05, "loss": 0.9504, "step": 1900 }, { "epoch": 0.1913156708569139, "grad_norm": 3.4706664085388184, "learning_rate": 4.983751680617059e-05, "loss": 0.9323, "step": 1910 }, { "epoch": 0.19231732358391346, "grad_norm": 3.1237800121307373, "learning_rate": 4.9835717429601905e-05, "loss": 0.9726, "step": 1920 }, { "epoch": 0.193318976310913, "grad_norm": 2.986846446990967, "learning_rate": 4.983390817730808e-05, "loss": 0.9088, "step": 1930 }, { "epoch": 0.19432062903791256, "grad_norm": 2.4418344497680664, "learning_rate": 4.983208905000854e-05, "loss": 1.0038, "step": 1940 }, { "epoch": 0.1953222817649121, "grad_norm": 3.0399439334869385, "learning_rate": 4.9830260048426636e-05, "loss": 1.0384, "step": 1950 }, { "epoch": 0.19632393449191166, "grad_norm": 2.5777769088745117, "learning_rate": 4.9828421173289676e-05, "loss": 0.9796, "step": 1960 }, { "epoch": 0.1973255872189112, "grad_norm": 3.4346256256103516, "learning_rate": 4.982657242532886e-05, "loss": 0.99, "step": 1970 }, { "epoch": 0.19832723994591075, "grad_norm": 2.746309518814087, "learning_rate": 4.982471380527934e-05, "loss": 0.9916, "step": 1980 }, { "epoch": 0.1993288926729103, "grad_norm": 2.7143187522888184, "learning_rate": 4.982284531388017e-05, "loss": 0.9572, "step": 1990 }, { "epoch": 0.20033054539990985, "grad_norm": 2.336798667907715, "learning_rate": 4.9820966951874345e-05, "loss": 0.9508, "step": 2000 }, { "epoch": 0.2013321981269094, "grad_norm": 2.9657061100006104, "learning_rate": 4.981907872000878e-05, "loss": 1.0059, "step": 2010 }, { "epoch": 0.20233385085390895, "grad_norm": 2.5726466178894043, "learning_rate": 4.9817180619034324e-05, "loss": 0.9964, "step": 2020 }, { "epoch": 0.2033355035809085, "grad_norm": 2.542393207550049, "learning_rate": 4.9815272649705733e-05, "loss": 0.9314, "step": 2030 }, { "epoch": 0.20433715630790805, "grad_norm": 2.635509729385376, "learning_rate": 4.98133548127817e-05, "loss": 0.9909, "step": 2040 }, { "epoch": 0.2053388090349076, "grad_norm": 2.7643823623657227, "learning_rate": 4.981142710902482e-05, "loss": 0.9538, "step": 2050 }, { "epoch": 0.20634046176190715, "grad_norm": 2.9941859245300293, "learning_rate": 4.980948953920166e-05, "loss": 1.0004, "step": 2060 }, { "epoch": 0.2073421144889067, "grad_norm": 2.7872562408447266, "learning_rate": 4.980754210408266e-05, "loss": 0.9699, "step": 2070 }, { "epoch": 0.20834376721590625, "grad_norm": 4.475282192230225, "learning_rate": 4.98055848044422e-05, "loss": 1.0183, "step": 2080 }, { "epoch": 0.20934541994290579, "grad_norm": 2.9876139163970947, "learning_rate": 4.980361764105859e-05, "loss": 0.9116, "step": 2090 }, { "epoch": 0.21034707266990535, "grad_norm": 3.074251890182495, "learning_rate": 4.980164061471405e-05, "loss": 0.9749, "step": 2100 }, { "epoch": 0.21134872539690489, "grad_norm": 3.2771337032318115, "learning_rate": 4.979965372619475e-05, "loss": 0.9717, "step": 2110 }, { "epoch": 0.21235037812390445, "grad_norm": 3.6136999130249023, "learning_rate": 4.9797656976290736e-05, "loss": 0.95, "step": 2120 }, { "epoch": 0.21335203085090398, "grad_norm": 3.1407272815704346, "learning_rate": 4.979565036579601e-05, "loss": 0.9831, "step": 2130 }, { "epoch": 0.21435368357790355, "grad_norm": 2.501585006713867, "learning_rate": 4.979363389550849e-05, "loss": 0.9725, "step": 2140 }, { "epoch": 0.21535533630490308, "grad_norm": 2.292156219482422, "learning_rate": 4.979160756623e-05, "loss": 0.8937, "step": 2150 }, { "epoch": 0.21635698903190265, "grad_norm": 2.627570629119873, "learning_rate": 4.978957137876629e-05, "loss": 0.9211, "step": 2160 }, { "epoch": 0.21735864175890218, "grad_norm": 3.1033174991607666, "learning_rate": 4.978752533392705e-05, "loss": 0.9802, "step": 2170 }, { "epoch": 0.21836029448590175, "grad_norm": 2.952439069747925, "learning_rate": 4.978546943252586e-05, "loss": 0.8931, "step": 2180 }, { "epoch": 0.21936194721290128, "grad_norm": 2.534576416015625, "learning_rate": 4.978340367538023e-05, "loss": 1.0088, "step": 2190 }, { "epoch": 0.22036359993990084, "grad_norm": 2.862668752670288, "learning_rate": 4.9781328063311614e-05, "loss": 0.9563, "step": 2200 }, { "epoch": 0.22136525266690038, "grad_norm": 2.849451780319214, "learning_rate": 4.977924259714534e-05, "loss": 0.986, "step": 2210 }, { "epoch": 0.22236690539389994, "grad_norm": 2.9716901779174805, "learning_rate": 4.977714727771069e-05, "loss": 0.8527, "step": 2220 }, { "epoch": 0.22336855812089948, "grad_norm": 2.634556770324707, "learning_rate": 4.977504210584084e-05, "loss": 0.9426, "step": 2230 }, { "epoch": 0.22437021084789904, "grad_norm": 2.3726024627685547, "learning_rate": 4.97729270823729e-05, "loss": 0.9265, "step": 2240 }, { "epoch": 0.22537186357489858, "grad_norm": 3.005441665649414, "learning_rate": 4.977080220814789e-05, "loss": 0.919, "step": 2250 }, { "epoch": 0.22637351630189814, "grad_norm": 3.0273005962371826, "learning_rate": 4.976866748401075e-05, "loss": 0.9681, "step": 2260 }, { "epoch": 0.22737516902889768, "grad_norm": 2.832097291946411, "learning_rate": 4.976652291081035e-05, "loss": 0.9547, "step": 2270 }, { "epoch": 0.22837682175589724, "grad_norm": 2.40688157081604, "learning_rate": 4.976436848939944e-05, "loss": 0.949, "step": 2280 }, { "epoch": 0.22937847448289678, "grad_norm": 2.177398681640625, "learning_rate": 4.976220422063473e-05, "loss": 0.9046, "step": 2290 }, { "epoch": 0.23038012720989634, "grad_norm": 2.506649971008301, "learning_rate": 4.97600301053768e-05, "loss": 0.9084, "step": 2300 }, { "epoch": 0.23138177993689588, "grad_norm": 2.7216312885284424, "learning_rate": 4.975784614449019e-05, "loss": 0.8898, "step": 2310 }, { "epoch": 0.23238343266389544, "grad_norm": 2.622244358062744, "learning_rate": 4.975565233884333e-05, "loss": 0.9973, "step": 2320 }, { "epoch": 0.23338508539089498, "grad_norm": 2.5111606121063232, "learning_rate": 4.9753448689308557e-05, "loss": 0.9258, "step": 2330 }, { "epoch": 0.23438673811789454, "grad_norm": 3.298383951187134, "learning_rate": 4.975123519676215e-05, "loss": 0.9868, "step": 2340 }, { "epoch": 0.23538839084489407, "grad_norm": 3.6510188579559326, "learning_rate": 4.974901186208428e-05, "loss": 0.9603, "step": 2350 }, { "epoch": 0.23639004357189364, "grad_norm": 2.3483967781066895, "learning_rate": 4.9746778686159045e-05, "loss": 0.8984, "step": 2360 }, { "epoch": 0.23739169629889317, "grad_norm": 2.734997510910034, "learning_rate": 4.9744535669874435e-05, "loss": 0.9412, "step": 2370 }, { "epoch": 0.23839334902589274, "grad_norm": 2.5782012939453125, "learning_rate": 4.974228281412239e-05, "loss": 1.002, "step": 2380 }, { "epoch": 0.23939500175289227, "grad_norm": 2.8419687747955322, "learning_rate": 4.974002011979871e-05, "loss": 0.9378, "step": 2390 }, { "epoch": 0.2403966544798918, "grad_norm": 2.9137983322143555, "learning_rate": 4.973774758780316e-05, "loss": 0.9227, "step": 2400 }, { "epoch": 0.24139830720689137, "grad_norm": 2.580446481704712, "learning_rate": 4.9735465219039396e-05, "loss": 0.9542, "step": 2410 }, { "epoch": 0.2423999599338909, "grad_norm": 3.0914855003356934, "learning_rate": 4.973317301441497e-05, "loss": 0.9186, "step": 2420 }, { "epoch": 0.24340161266089047, "grad_norm": 2.856234073638916, "learning_rate": 4.973087097484136e-05, "loss": 0.9466, "step": 2430 }, { "epoch": 0.24440326538789, "grad_norm": 3.1362180709838867, "learning_rate": 4.972855910123397e-05, "loss": 0.8701, "step": 2440 }, { "epoch": 0.24540491811488957, "grad_norm": 2.7053310871124268, "learning_rate": 4.972623739451208e-05, "loss": 0.9137, "step": 2450 }, { "epoch": 0.2464065708418891, "grad_norm": 2.5193698406219482, "learning_rate": 4.9723905855598904e-05, "loss": 0.8837, "step": 2460 }, { "epoch": 0.24740822356888867, "grad_norm": 2.841562509536743, "learning_rate": 4.972156448542156e-05, "loss": 0.9857, "step": 2470 }, { "epoch": 0.2484098762958882, "grad_norm": 2.9049248695373535, "learning_rate": 4.9719213284911084e-05, "loss": 0.947, "step": 2480 }, { "epoch": 0.24941152902288777, "grad_norm": 2.958031415939331, "learning_rate": 4.971685225500239e-05, "loss": 0.9208, "step": 2490 }, { "epoch": 0.2504131817498873, "grad_norm": 2.562629222869873, "learning_rate": 4.9714481396634346e-05, "loss": 0.8968, "step": 2500 }, { "epoch": 0.25141483447688684, "grad_norm": 2.949819803237915, "learning_rate": 4.97121007107497e-05, "loss": 0.856, "step": 2510 }, { "epoch": 0.25241648720388643, "grad_norm": 3.606531858444214, "learning_rate": 4.97097101982951e-05, "loss": 0.9165, "step": 2520 }, { "epoch": 0.25341813993088597, "grad_norm": 3.1925840377807617, "learning_rate": 4.970730986022113e-05, "loss": 0.9481, "step": 2530 }, { "epoch": 0.2544197926578855, "grad_norm": 3.001221179962158, "learning_rate": 4.9704899697482256e-05, "loss": 0.9386, "step": 2540 }, { "epoch": 0.25542144538488504, "grad_norm": 3.6826331615448, "learning_rate": 4.9702479711036864e-05, "loss": 0.8899, "step": 2550 }, { "epoch": 0.25642309811188463, "grad_norm": 3.200007200241089, "learning_rate": 4.970004990184724e-05, "loss": 0.8934, "step": 2560 }, { "epoch": 0.25742475083888416, "grad_norm": 2.6780619621276855, "learning_rate": 4.969761027087957e-05, "loss": 0.8705, "step": 2570 }, { "epoch": 0.2584264035658837, "grad_norm": 2.7149860858917236, "learning_rate": 4.969516081910397e-05, "loss": 0.9574, "step": 2580 }, { "epoch": 0.25942805629288324, "grad_norm": 2.5190060138702393, "learning_rate": 4.969270154749444e-05, "loss": 0.9411, "step": 2590 }, { "epoch": 0.2604297090198828, "grad_norm": 2.75197696685791, "learning_rate": 4.9690232457028887e-05, "loss": 0.9248, "step": 2600 }, { "epoch": 0.26143136174688236, "grad_norm": 2.462170362472534, "learning_rate": 4.968775354868912e-05, "loss": 0.9209, "step": 2610 }, { "epoch": 0.2624330144738819, "grad_norm": 2.7664783000946045, "learning_rate": 4.9685264823460866e-05, "loss": 0.8915, "step": 2620 }, { "epoch": 0.26343466720088143, "grad_norm": 2.3150320053100586, "learning_rate": 4.968276628233374e-05, "loss": 0.9826, "step": 2630 }, { "epoch": 0.264436319927881, "grad_norm": 2.6080338954925537, "learning_rate": 4.9680257926301274e-05, "loss": 0.8408, "step": 2640 }, { "epoch": 0.26543797265488056, "grad_norm": 3.5915043354034424, "learning_rate": 4.967773975636088e-05, "loss": 0.844, "step": 2650 }, { "epoch": 0.2664396253818801, "grad_norm": 2.8301284313201904, "learning_rate": 4.967521177351392e-05, "loss": 0.9263, "step": 2660 }, { "epoch": 0.26744127810887963, "grad_norm": 2.7187178134918213, "learning_rate": 4.9672673978765594e-05, "loss": 0.8942, "step": 2670 }, { "epoch": 0.2684429308358792, "grad_norm": 2.7909884452819824, "learning_rate": 4.9670126373125056e-05, "loss": 0.9328, "step": 2680 }, { "epoch": 0.26944458356287876, "grad_norm": 2.38917875289917, "learning_rate": 4.9667568957605324e-05, "loss": 0.9005, "step": 2690 }, { "epoch": 0.2704462362898783, "grad_norm": 2.494300127029419, "learning_rate": 4.966500173322335e-05, "loss": 0.8757, "step": 2700 }, { "epoch": 0.27144788901687783, "grad_norm": 2.545513868331909, "learning_rate": 4.966242470099997e-05, "loss": 0.9322, "step": 2710 }, { "epoch": 0.2724495417438774, "grad_norm": 2.4247043132781982, "learning_rate": 4.96598378619599e-05, "loss": 0.9301, "step": 2720 }, { "epoch": 0.27345119447087696, "grad_norm": 2.355764865875244, "learning_rate": 4.96572412171318e-05, "loss": 0.8569, "step": 2730 }, { "epoch": 0.2744528471978765, "grad_norm": 3.5294244289398193, "learning_rate": 4.965463476754819e-05, "loss": 0.9178, "step": 2740 }, { "epoch": 0.27545449992487603, "grad_norm": 3.509269952774048, "learning_rate": 4.9652018514245514e-05, "loss": 0.861, "step": 2750 }, { "epoch": 0.2764561526518756, "grad_norm": 2.9568450450897217, "learning_rate": 4.96493924582641e-05, "loss": 0.8833, "step": 2760 }, { "epoch": 0.27745780537887516, "grad_norm": 2.9352688789367676, "learning_rate": 4.964675660064817e-05, "loss": 0.8888, "step": 2770 }, { "epoch": 0.2784594581058747, "grad_norm": 2.9790830612182617, "learning_rate": 4.9644110942445864e-05, "loss": 0.9142, "step": 2780 }, { "epoch": 0.2794611108328742, "grad_norm": 2.514970064163208, "learning_rate": 4.9641455484709196e-05, "loss": 0.9056, "step": 2790 }, { "epoch": 0.2804627635598738, "grad_norm": 3.5085692405700684, "learning_rate": 4.9638790228494106e-05, "loss": 0.857, "step": 2800 }, { "epoch": 0.28146441628687335, "grad_norm": 4.000670433044434, "learning_rate": 4.9636115174860396e-05, "loss": 0.8086, "step": 2810 }, { "epoch": 0.2824660690138729, "grad_norm": 2.9119763374328613, "learning_rate": 4.9633430324871776e-05, "loss": 0.8537, "step": 2820 }, { "epoch": 0.2834677217408724, "grad_norm": 1.9512786865234375, "learning_rate": 4.963073567959587e-05, "loss": 0.7311, "step": 2830 }, { "epoch": 0.284469374467872, "grad_norm": 2.5956673622131348, "learning_rate": 4.962803124010417e-05, "loss": 0.8748, "step": 2840 }, { "epoch": 0.28547102719487155, "grad_norm": 2.648887872695923, "learning_rate": 4.9625317007472085e-05, "loss": 0.911, "step": 2850 }, { "epoch": 0.2864726799218711, "grad_norm": 2.7080471515655518, "learning_rate": 4.96225929827789e-05, "loss": 0.8843, "step": 2860 }, { "epoch": 0.2874743326488706, "grad_norm": 2.9496068954467773, "learning_rate": 4.961985916710781e-05, "loss": 0.8772, "step": 2870 }, { "epoch": 0.28847598537587016, "grad_norm": 3.3540945053100586, "learning_rate": 4.961711556154588e-05, "loss": 0.8951, "step": 2880 }, { "epoch": 0.28947763810286975, "grad_norm": 2.877737045288086, "learning_rate": 4.961436216718409e-05, "loss": 0.9708, "step": 2890 }, { "epoch": 0.2904792908298693, "grad_norm": 2.5713319778442383, "learning_rate": 4.961159898511732e-05, "loss": 0.8817, "step": 2900 }, { "epoch": 0.2914809435568688, "grad_norm": 3.155395746231079, "learning_rate": 4.960882601644431e-05, "loss": 0.9464, "step": 2910 }, { "epoch": 0.29248259628386836, "grad_norm": 2.9532198905944824, "learning_rate": 4.960604326226771e-05, "loss": 0.8817, "step": 2920 }, { "epoch": 0.29348424901086795, "grad_norm": 3.1550869941711426, "learning_rate": 4.960325072369407e-05, "loss": 0.8659, "step": 2930 }, { "epoch": 0.2944859017378675, "grad_norm": 2.6398537158966064, "learning_rate": 4.960044840183381e-05, "loss": 0.9133, "step": 2940 }, { "epoch": 0.295487554464867, "grad_norm": 2.6897711753845215, "learning_rate": 4.959763629780126e-05, "loss": 0.8332, "step": 2950 }, { "epoch": 0.29648920719186656, "grad_norm": 3.5697755813598633, "learning_rate": 4.959481441271462e-05, "loss": 0.9082, "step": 2960 }, { "epoch": 0.29749085991886615, "grad_norm": 3.7671542167663574, "learning_rate": 4.9591982747696006e-05, "loss": 0.9394, "step": 2970 }, { "epoch": 0.2984925126458657, "grad_norm": 2.823279857635498, "learning_rate": 4.958914130387139e-05, "loss": 0.8998, "step": 2980 }, { "epoch": 0.2994941653728652, "grad_norm": 2.3585197925567627, "learning_rate": 4.958629008237066e-05, "loss": 0.8573, "step": 2990 }, { "epoch": 0.30049581809986475, "grad_norm": 2.3386313915252686, "learning_rate": 4.958342908432757e-05, "loss": 0.7891, "step": 3000 }, { "epoch": 0.30149747082686434, "grad_norm": 2.2583234310150146, "learning_rate": 4.958055831087979e-05, "loss": 0.912, "step": 3010 }, { "epoch": 0.3024991235538639, "grad_norm": 3.0650525093078613, "learning_rate": 4.957767776316885e-05, "loss": 0.9714, "step": 3020 }, { "epoch": 0.3035007762808634, "grad_norm": 2.90987491607666, "learning_rate": 4.9574787442340185e-05, "loss": 0.7965, "step": 3030 }, { "epoch": 0.30450242900786295, "grad_norm": 2.936350107192993, "learning_rate": 4.95718873495431e-05, "loss": 0.8288, "step": 3040 }, { "epoch": 0.30550408173486254, "grad_norm": 3.2421929836273193, "learning_rate": 4.956897748593079e-05, "loss": 0.8186, "step": 3050 }, { "epoch": 0.3065057344618621, "grad_norm": 2.5979058742523193, "learning_rate": 4.956605785266034e-05, "loss": 0.875, "step": 3060 }, { "epoch": 0.3075073871888616, "grad_norm": 2.861781597137451, "learning_rate": 4.956312845089274e-05, "loss": 0.8187, "step": 3070 }, { "epoch": 0.30850903991586115, "grad_norm": 2.5205078125, "learning_rate": 4.9560189281792815e-05, "loss": 0.8718, "step": 3080 }, { "epoch": 0.30951069264286074, "grad_norm": 2.3308043479919434, "learning_rate": 4.955724034652931e-05, "loss": 0.9551, "step": 3090 }, { "epoch": 0.3105123453698603, "grad_norm": 2.916992425918579, "learning_rate": 4.955428164627486e-05, "loss": 0.828, "step": 3100 }, { "epoch": 0.3115139980968598, "grad_norm": 2.490170478820801, "learning_rate": 4.955131318220595e-05, "loss": 0.8135, "step": 3110 }, { "epoch": 0.31251565082385935, "grad_norm": 3.505068302154541, "learning_rate": 4.954833495550297e-05, "loss": 0.8689, "step": 3120 }, { "epoch": 0.31351730355085894, "grad_norm": 2.6903154850006104, "learning_rate": 4.9545346967350204e-05, "loss": 0.9086, "step": 3130 }, { "epoch": 0.3145189562778585, "grad_norm": 3.43782377243042, "learning_rate": 4.954234921893579e-05, "loss": 0.8903, "step": 3140 }, { "epoch": 0.315520609004858, "grad_norm": 2.6618101596832275, "learning_rate": 4.953934171145175e-05, "loss": 0.9068, "step": 3150 }, { "epoch": 0.31652226173185755, "grad_norm": 3.6636927127838135, "learning_rate": 4.953632444609401e-05, "loss": 0.8745, "step": 3160 }, { "epoch": 0.31752391445885714, "grad_norm": 2.4644505977630615, "learning_rate": 4.953329742406235e-05, "loss": 0.9156, "step": 3170 }, { "epoch": 0.3185255671858567, "grad_norm": 2.7352774143218994, "learning_rate": 4.9530260646560455e-05, "loss": 0.8799, "step": 3180 }, { "epoch": 0.3195272199128562, "grad_norm": 2.6652235984802246, "learning_rate": 4.952721411479587e-05, "loss": 0.8353, "step": 3190 }, { "epoch": 0.32052887263985574, "grad_norm": 2.7542600631713867, "learning_rate": 4.952415782998001e-05, "loss": 0.7625, "step": 3200 }, { "epoch": 0.32153052536685534, "grad_norm": 2.849294662475586, "learning_rate": 4.9521091793328204e-05, "loss": 0.8022, "step": 3210 }, { "epoch": 0.32253217809385487, "grad_norm": 2.48455548286438, "learning_rate": 4.951801600605962e-05, "loss": 0.8215, "step": 3220 }, { "epoch": 0.3235338308208544, "grad_norm": 3.339585304260254, "learning_rate": 4.9514930469397326e-05, "loss": 0.8675, "step": 3230 }, { "epoch": 0.32453548354785394, "grad_norm": 2.703068494796753, "learning_rate": 4.951183518456827e-05, "loss": 0.7836, "step": 3240 }, { "epoch": 0.32553713627485353, "grad_norm": 2.545600414276123, "learning_rate": 4.950873015280325e-05, "loss": 0.8031, "step": 3250 }, { "epoch": 0.32653878900185307, "grad_norm": 3.298454523086548, "learning_rate": 4.9505615375336965e-05, "loss": 0.8976, "step": 3260 }, { "epoch": 0.3275404417288526, "grad_norm": 2.814692735671997, "learning_rate": 4.950249085340799e-05, "loss": 0.8475, "step": 3270 }, { "epoch": 0.32854209445585214, "grad_norm": 2.775646448135376, "learning_rate": 4.949935658825876e-05, "loss": 0.8412, "step": 3280 }, { "epoch": 0.32954374718285173, "grad_norm": 2.9351887702941895, "learning_rate": 4.9496212581135585e-05, "loss": 0.8447, "step": 3290 }, { "epoch": 0.33054539990985127, "grad_norm": 2.701782464981079, "learning_rate": 4.9493058833288666e-05, "loss": 0.9201, "step": 3300 }, { "epoch": 0.3315470526368508, "grad_norm": 3.1329259872436523, "learning_rate": 4.948989534597205e-05, "loss": 0.7931, "step": 3310 }, { "epoch": 0.33254870536385034, "grad_norm": 3.1399502754211426, "learning_rate": 4.948672212044369e-05, "loss": 0.8213, "step": 3320 }, { "epoch": 0.33355035809084993, "grad_norm": 2.487262725830078, "learning_rate": 4.948353915796538e-05, "loss": 0.82, "step": 3330 }, { "epoch": 0.33455201081784947, "grad_norm": 2.3617560863494873, "learning_rate": 4.948034645980281e-05, "loss": 0.8816, "step": 3340 }, { "epoch": 0.335553663544849, "grad_norm": 2.443876266479492, "learning_rate": 4.947714402722552e-05, "loss": 0.8592, "step": 3350 }, { "epoch": 0.33655531627184854, "grad_norm": 3.451190948486328, "learning_rate": 4.947393186150694e-05, "loss": 0.8691, "step": 3360 }, { "epoch": 0.3375569689988481, "grad_norm": 2.79589581489563, "learning_rate": 4.947070996392435e-05, "loss": 0.8485, "step": 3370 }, { "epoch": 0.33855862172584766, "grad_norm": 3.133854389190674, "learning_rate": 4.946747833575894e-05, "loss": 0.8401, "step": 3380 }, { "epoch": 0.3395602744528472, "grad_norm": 2.8713510036468506, "learning_rate": 4.946423697829572e-05, "loss": 0.7918, "step": 3390 }, { "epoch": 0.34056192717984674, "grad_norm": 3.4451394081115723, "learning_rate": 4.946098589282359e-05, "loss": 0.8507, "step": 3400 }, { "epoch": 0.34156357990684627, "grad_norm": 2.4211623668670654, "learning_rate": 4.945772508063531e-05, "loss": 0.9028, "step": 3410 }, { "epoch": 0.34256523263384586, "grad_norm": 3.622356414794922, "learning_rate": 4.945445454302754e-05, "loss": 0.7338, "step": 3420 }, { "epoch": 0.3435668853608454, "grad_norm": 2.8370401859283447, "learning_rate": 4.945117428130076e-05, "loss": 0.8819, "step": 3430 }, { "epoch": 0.34456853808784493, "grad_norm": 2.372068405151367, "learning_rate": 4.944788429675936e-05, "loss": 0.817, "step": 3440 }, { "epoch": 0.34557019081484447, "grad_norm": 2.5194711685180664, "learning_rate": 4.944458459071156e-05, "loss": 0.8744, "step": 3450 }, { "epoch": 0.34657184354184406, "grad_norm": 2.253473997116089, "learning_rate": 4.944127516446947e-05, "loss": 0.8461, "step": 3460 }, { "epoch": 0.3475734962688436, "grad_norm": 2.651766300201416, "learning_rate": 4.9437956019349054e-05, "loss": 0.8544, "step": 3470 }, { "epoch": 0.34857514899584313, "grad_norm": 2.0998165607452393, "learning_rate": 4.943462715667015e-05, "loss": 0.8272, "step": 3480 }, { "epoch": 0.34957680172284267, "grad_norm": 3.30053973197937, "learning_rate": 4.9431288577756446e-05, "loss": 0.8739, "step": 3490 }, { "epoch": 0.35057845444984226, "grad_norm": 2.7177000045776367, "learning_rate": 4.9427940283935504e-05, "loss": 0.8858, "step": 3500 }, { "epoch": 0.3515801071768418, "grad_norm": 2.158338785171509, "learning_rate": 4.9424582276538746e-05, "loss": 0.7827, "step": 3510 }, { "epoch": 0.35258175990384133, "grad_norm": 3.1199424266815186, "learning_rate": 4.9421214556901454e-05, "loss": 0.8491, "step": 3520 }, { "epoch": 0.35358341263084087, "grad_norm": 2.8127048015594482, "learning_rate": 4.941783712636278e-05, "loss": 0.8002, "step": 3530 }, { "epoch": 0.35458506535784046, "grad_norm": 2.9365146160125732, "learning_rate": 4.941444998626573e-05, "loss": 0.9608, "step": 3540 }, { "epoch": 0.35558671808484, "grad_norm": 3.4413721561431885, "learning_rate": 4.9411053137957174e-05, "loss": 0.8685, "step": 3550 }, { "epoch": 0.35658837081183953, "grad_norm": 2.9965898990631104, "learning_rate": 4.940764658278785e-05, "loss": 0.8144, "step": 3560 }, { "epoch": 0.35759002353883906, "grad_norm": 2.883406162261963, "learning_rate": 4.9404230322112324e-05, "loss": 0.8437, "step": 3570 }, { "epoch": 0.35859167626583865, "grad_norm": 3.0368754863739014, "learning_rate": 4.940080435728907e-05, "loss": 0.8707, "step": 3580 }, { "epoch": 0.3595933289928382, "grad_norm": 2.8644607067108154, "learning_rate": 4.939736868968038e-05, "loss": 0.8184, "step": 3590 }, { "epoch": 0.3605949817198377, "grad_norm": 2.6353414058685303, "learning_rate": 4.9393923320652424e-05, "loss": 0.8343, "step": 3600 }, { "epoch": 0.36159663444683726, "grad_norm": 3.469896078109741, "learning_rate": 4.939046825157523e-05, "loss": 0.845, "step": 3610 }, { "epoch": 0.36259828717383685, "grad_norm": 2.8434600830078125, "learning_rate": 4.9387003483822666e-05, "loss": 0.8833, "step": 3620 }, { "epoch": 0.3635999399008364, "grad_norm": 3.1709251403808594, "learning_rate": 4.938352901877249e-05, "loss": 0.826, "step": 3630 }, { "epoch": 0.3646015926278359, "grad_norm": 2.7404329776763916, "learning_rate": 4.938004485780628e-05, "loss": 0.7455, "step": 3640 }, { "epoch": 0.36560324535483546, "grad_norm": 4.057974338531494, "learning_rate": 4.937655100230949e-05, "loss": 0.7923, "step": 3650 }, { "epoch": 0.36660489808183505, "grad_norm": 2.943912982940674, "learning_rate": 4.937304745367143e-05, "loss": 0.8386, "step": 3660 }, { "epoch": 0.3676065508088346, "grad_norm": 3.354377269744873, "learning_rate": 4.936953421328524e-05, "loss": 0.8257, "step": 3670 }, { "epoch": 0.3686082035358341, "grad_norm": 2.800428628921509, "learning_rate": 4.936601128254794e-05, "loss": 0.7587, "step": 3680 }, { "epoch": 0.36960985626283366, "grad_norm": 3.1578376293182373, "learning_rate": 4.936247866286041e-05, "loss": 0.7953, "step": 3690 }, { "epoch": 0.37061150898983325, "grad_norm": 2.962111711502075, "learning_rate": 4.935893635562735e-05, "loss": 0.826, "step": 3700 }, { "epoch": 0.3716131617168328, "grad_norm": 2.9009153842926025, "learning_rate": 4.935538436225733e-05, "loss": 0.7948, "step": 3710 }, { "epoch": 0.3726148144438323, "grad_norm": 2.9375569820404053, "learning_rate": 4.9351822684162787e-05, "loss": 0.8348, "step": 3720 }, { "epoch": 0.37361646717083186, "grad_norm": 3.2307522296905518, "learning_rate": 4.934825132275999e-05, "loss": 0.8755, "step": 3730 }, { "epoch": 0.37461811989783145, "grad_norm": 3.187584161758423, "learning_rate": 4.934467027946905e-05, "loss": 0.8315, "step": 3740 }, { "epoch": 0.375619772624831, "grad_norm": 2.4784653186798096, "learning_rate": 4.9341079555713946e-05, "loss": 0.8161, "step": 3750 }, { "epoch": 0.3766214253518305, "grad_norm": 3.0355725288391113, "learning_rate": 4.9337479152922504e-05, "loss": 0.7234, "step": 3760 }, { "epoch": 0.37762307807883005, "grad_norm": 5.005615711212158, "learning_rate": 4.93338690725264e-05, "loss": 0.872, "step": 3770 }, { "epoch": 0.37862473080582965, "grad_norm": 2.9800662994384766, "learning_rate": 4.933024931596114e-05, "loss": 0.8399, "step": 3780 }, { "epoch": 0.3796263835328292, "grad_norm": 3.00419282913208, "learning_rate": 4.93266198846661e-05, "loss": 0.7835, "step": 3790 }, { "epoch": 0.3806280362598287, "grad_norm": 2.776589870452881, "learning_rate": 4.93229807800845e-05, "loss": 0.8586, "step": 3800 }, { "epoch": 0.38162968898682825, "grad_norm": 2.228154420852661, "learning_rate": 4.9319332003663385e-05, "loss": 0.7737, "step": 3810 }, { "epoch": 0.3826313417138278, "grad_norm": 2.2705202102661133, "learning_rate": 4.931567355685368e-05, "loss": 0.8327, "step": 3820 }, { "epoch": 0.3836329944408274, "grad_norm": 3.1992833614349365, "learning_rate": 4.9312005441110126e-05, "loss": 0.7732, "step": 3830 }, { "epoch": 0.3846346471678269, "grad_norm": 2.792924642562866, "learning_rate": 4.930832765789132e-05, "loss": 0.7673, "step": 3840 }, { "epoch": 0.38563629989482645, "grad_norm": 2.705148935317993, "learning_rate": 4.9304640208659714e-05, "loss": 0.7368, "step": 3850 }, { "epoch": 0.386637952621826, "grad_norm": 2.6847712993621826, "learning_rate": 4.930094309488158e-05, "loss": 0.8845, "step": 3860 }, { "epoch": 0.3876396053488256, "grad_norm": 2.3294968605041504, "learning_rate": 4.929723631802705e-05, "loss": 0.8204, "step": 3870 }, { "epoch": 0.3886412580758251, "grad_norm": 3.4525415897369385, "learning_rate": 4.9293519879570095e-05, "loss": 0.7846, "step": 3880 }, { "epoch": 0.38964291080282465, "grad_norm": 3.896613597869873, "learning_rate": 4.9289793780988526e-05, "loss": 0.7868, "step": 3890 }, { "epoch": 0.3906445635298242, "grad_norm": 2.876314878463745, "learning_rate": 4.928605802376399e-05, "loss": 0.7856, "step": 3900 }, { "epoch": 0.3916462162568238, "grad_norm": 3.1712610721588135, "learning_rate": 4.9282312609382004e-05, "loss": 0.748, "step": 3910 }, { "epoch": 0.3926478689838233, "grad_norm": 3.0286600589752197, "learning_rate": 4.927855753933188e-05, "loss": 0.8701, "step": 3920 }, { "epoch": 0.39364952171082285, "grad_norm": 2.6831576824188232, "learning_rate": 4.9274792815106794e-05, "loss": 0.8515, "step": 3930 }, { "epoch": 0.3946511744378224, "grad_norm": 2.632242202758789, "learning_rate": 4.9271018438203766e-05, "loss": 0.8772, "step": 3940 }, { "epoch": 0.395652827164822, "grad_norm": 3.1001410484313965, "learning_rate": 4.9267234410123644e-05, "loss": 0.7569, "step": 3950 }, { "epoch": 0.3966544798918215, "grad_norm": 3.0256540775299072, "learning_rate": 4.9263440732371116e-05, "loss": 0.8183, "step": 3960 }, { "epoch": 0.39765613261882105, "grad_norm": 3.7380168437957764, "learning_rate": 4.925963740645471e-05, "loss": 0.8241, "step": 3970 }, { "epoch": 0.3986577853458206, "grad_norm": 3.397099256515503, "learning_rate": 4.925582443388679e-05, "loss": 0.8335, "step": 3980 }, { "epoch": 0.3996594380728202, "grad_norm": 2.583353281021118, "learning_rate": 4.925200181618354e-05, "loss": 0.7761, "step": 3990 }, { "epoch": 0.4006610907998197, "grad_norm": 2.2222886085510254, "learning_rate": 4.924816955486501e-05, "loss": 0.7627, "step": 4000 }, { "epoch": 0.40166274352681924, "grad_norm": 2.9799551963806152, "learning_rate": 4.9244327651455065e-05, "loss": 0.8719, "step": 4010 }, { "epoch": 0.4026643962538188, "grad_norm": 2.8081564903259277, "learning_rate": 4.9240476107481405e-05, "loss": 0.858, "step": 4020 }, { "epoch": 0.40366604898081837, "grad_norm": 3.4415876865386963, "learning_rate": 4.923661492447556e-05, "loss": 0.828, "step": 4030 }, { "epoch": 0.4046677017078179, "grad_norm": 3.00347900390625, "learning_rate": 4.92327441039729e-05, "loss": 0.7376, "step": 4040 }, { "epoch": 0.40566935443481744, "grad_norm": 2.860119581222534, "learning_rate": 4.922886364751263e-05, "loss": 0.8286, "step": 4050 }, { "epoch": 0.406671007161817, "grad_norm": 2.8311233520507812, "learning_rate": 4.92249735566378e-05, "loss": 0.8347, "step": 4060 }, { "epoch": 0.40767265988881657, "grad_norm": 2.2550930976867676, "learning_rate": 4.922107383289524e-05, "loss": 0.8662, "step": 4070 }, { "epoch": 0.4086743126158161, "grad_norm": 2.128415584564209, "learning_rate": 4.921716447783566e-05, "loss": 0.7438, "step": 4080 }, { "epoch": 0.40967596534281564, "grad_norm": 2.728203773498535, "learning_rate": 4.921324549301359e-05, "loss": 0.8708, "step": 4090 }, { "epoch": 0.4106776180698152, "grad_norm": 2.9716641902923584, "learning_rate": 4.9209316879987374e-05, "loss": 0.8483, "step": 4100 }, { "epoch": 0.41167927079681477, "grad_norm": 2.1702470779418945, "learning_rate": 4.92053786403192e-05, "loss": 0.7749, "step": 4110 }, { "epoch": 0.4126809235238143, "grad_norm": 2.6090810298919678, "learning_rate": 4.9201430775575074e-05, "loss": 0.7221, "step": 4120 }, { "epoch": 0.41368257625081384, "grad_norm": 3.643763780593872, "learning_rate": 4.9197473287324835e-05, "loss": 0.8035, "step": 4130 }, { "epoch": 0.4146842289778134, "grad_norm": 2.5547125339508057, "learning_rate": 4.919350617714215e-05, "loss": 0.779, "step": 4140 }, { "epoch": 0.41568588170481297, "grad_norm": 2.8796565532684326, "learning_rate": 4.9189529446604484e-05, "loss": 0.8606, "step": 4150 }, { "epoch": 0.4166875344318125, "grad_norm": 2.778782606124878, "learning_rate": 4.918554309729318e-05, "loss": 0.8208, "step": 4160 }, { "epoch": 0.41768918715881204, "grad_norm": 3.649463176727295, "learning_rate": 4.9181547130793385e-05, "loss": 0.8072, "step": 4170 }, { "epoch": 0.41869083988581157, "grad_norm": 2.776176929473877, "learning_rate": 4.917754154869403e-05, "loss": 0.7387, "step": 4180 }, { "epoch": 0.41969249261281116, "grad_norm": 3.0687742233276367, "learning_rate": 4.917352635258794e-05, "loss": 0.8521, "step": 4190 }, { "epoch": 0.4206941453398107, "grad_norm": 2.9265615940093994, "learning_rate": 4.916950154407169e-05, "loss": 0.8117, "step": 4200 }, { "epoch": 0.42169579806681023, "grad_norm": 3.030052900314331, "learning_rate": 4.916546712474573e-05, "loss": 0.8334, "step": 4210 }, { "epoch": 0.42269745079380977, "grad_norm": 3.2427847385406494, "learning_rate": 4.916142309621432e-05, "loss": 0.8247, "step": 4220 }, { "epoch": 0.42369910352080936, "grad_norm": 2.7868199348449707, "learning_rate": 4.9157369460085535e-05, "loss": 0.8202, "step": 4230 }, { "epoch": 0.4247007562478089, "grad_norm": 2.5167086124420166, "learning_rate": 4.915330621797126e-05, "loss": 0.8389, "step": 4240 }, { "epoch": 0.42570240897480843, "grad_norm": 2.5550220012664795, "learning_rate": 4.914923337148722e-05, "loss": 0.8128, "step": 4250 }, { "epoch": 0.42670406170180797, "grad_norm": 2.4748544692993164, "learning_rate": 4.9145150922252944e-05, "loss": 0.826, "step": 4260 }, { "epoch": 0.42770571442880756, "grad_norm": 3.1754608154296875, "learning_rate": 4.9141058871891793e-05, "loss": 0.7927, "step": 4270 }, { "epoch": 0.4287073671558071, "grad_norm": 3.4101622104644775, "learning_rate": 4.9136957222030934e-05, "loss": 0.8662, "step": 4280 }, { "epoch": 0.42970901988280663, "grad_norm": 2.1540229320526123, "learning_rate": 4.9132845974301357e-05, "loss": 0.7403, "step": 4290 }, { "epoch": 0.43071067260980617, "grad_norm": 3.1899423599243164, "learning_rate": 4.912872513033786e-05, "loss": 0.7887, "step": 4300 }, { "epoch": 0.4317123253368057, "grad_norm": 2.9181227684020996, "learning_rate": 4.912459469177907e-05, "loss": 0.7683, "step": 4310 }, { "epoch": 0.4327139780638053, "grad_norm": 2.837829351425171, "learning_rate": 4.9120454660267426e-05, "loss": 0.7659, "step": 4320 }, { "epoch": 0.43371563079080483, "grad_norm": 2.8570311069488525, "learning_rate": 4.911630503744916e-05, "loss": 0.8271, "step": 4330 }, { "epoch": 0.43471728351780436, "grad_norm": 2.584602117538452, "learning_rate": 4.911214582497436e-05, "loss": 0.8455, "step": 4340 }, { "epoch": 0.4357189362448039, "grad_norm": 3.5033724308013916, "learning_rate": 4.91079770244969e-05, "loss": 0.8245, "step": 4350 }, { "epoch": 0.4367205889718035, "grad_norm": 2.954213857650757, "learning_rate": 4.910379863767446e-05, "loss": 0.832, "step": 4360 }, { "epoch": 0.437722241698803, "grad_norm": 3.0140154361724854, "learning_rate": 4.909961066616855e-05, "loss": 0.7357, "step": 4370 }, { "epoch": 0.43872389442580256, "grad_norm": 2.633049726486206, "learning_rate": 4.909541311164448e-05, "loss": 0.7821, "step": 4380 }, { "epoch": 0.4397255471528021, "grad_norm": 3.462796688079834, "learning_rate": 4.909120597577137e-05, "loss": 0.759, "step": 4390 }, { "epoch": 0.4407271998798017, "grad_norm": 2.973219633102417, "learning_rate": 4.9086989260222165e-05, "loss": 0.7813, "step": 4400 }, { "epoch": 0.4417288526068012, "grad_norm": 2.684271812438965, "learning_rate": 4.90827629666736e-05, "loss": 0.7966, "step": 4410 }, { "epoch": 0.44273050533380076, "grad_norm": 2.947772264480591, "learning_rate": 4.9078527096806225e-05, "loss": 0.7481, "step": 4420 }, { "epoch": 0.4437321580608003, "grad_norm": 3.4349887371063232, "learning_rate": 4.907428165230441e-05, "loss": 0.7553, "step": 4430 }, { "epoch": 0.4447338107877999, "grad_norm": 2.8952155113220215, "learning_rate": 4.907002663485632e-05, "loss": 0.7569, "step": 4440 }, { "epoch": 0.4457354635147994, "grad_norm": 2.7803821563720703, "learning_rate": 4.9065762046153914e-05, "loss": 0.8073, "step": 4450 }, { "epoch": 0.44673711624179896, "grad_norm": 3.2252511978149414, "learning_rate": 4.9061487887892985e-05, "loss": 0.7818, "step": 4460 }, { "epoch": 0.4477387689687985, "grad_norm": 3.153841018676758, "learning_rate": 4.905720416177312e-05, "loss": 0.839, "step": 4470 }, { "epoch": 0.4487404216957981, "grad_norm": 2.7690632343292236, "learning_rate": 4.9052910869497704e-05, "loss": 0.7685, "step": 4480 }, { "epoch": 0.4497420744227976, "grad_norm": 2.3396732807159424, "learning_rate": 4.904860801277392e-05, "loss": 0.6467, "step": 4490 }, { "epoch": 0.45074372714979716, "grad_norm": 2.70893931388855, "learning_rate": 4.904429559331279e-05, "loss": 0.7481, "step": 4500 }, { "epoch": 0.4517453798767967, "grad_norm": 3.1398801803588867, "learning_rate": 4.9039973612829094e-05, "loss": 0.7929, "step": 4510 }, { "epoch": 0.4527470326037963, "grad_norm": 2.2686514854431152, "learning_rate": 4.903564207304143e-05, "loss": 0.8276, "step": 4520 }, { "epoch": 0.4537486853307958, "grad_norm": 2.6630866527557373, "learning_rate": 4.903130097567222e-05, "loss": 0.7651, "step": 4530 }, { "epoch": 0.45475033805779536, "grad_norm": 2.9019227027893066, "learning_rate": 4.902695032244765e-05, "loss": 0.8739, "step": 4540 }, { "epoch": 0.4557519907847949, "grad_norm": 2.878720760345459, "learning_rate": 4.9022590115097723e-05, "loss": 0.8215, "step": 4550 }, { "epoch": 0.4567536435117945, "grad_norm": 2.4830129146575928, "learning_rate": 4.9018220355356246e-05, "loss": 0.7917, "step": 4560 }, { "epoch": 0.457755296238794, "grad_norm": 2.447023630142212, "learning_rate": 4.901384104496083e-05, "loss": 0.7182, "step": 4570 }, { "epoch": 0.45875694896579355, "grad_norm": 2.4233357906341553, "learning_rate": 4.900945218565285e-05, "loss": 0.8353, "step": 4580 }, { "epoch": 0.4597586016927931, "grad_norm": 2.6926865577697754, "learning_rate": 4.900505377917751e-05, "loss": 0.7566, "step": 4590 }, { "epoch": 0.4607602544197927, "grad_norm": 3.171412229537964, "learning_rate": 4.900064582728381e-05, "loss": 0.8012, "step": 4600 }, { "epoch": 0.4617619071467922, "grad_norm": 2.605421781539917, "learning_rate": 4.899622833172452e-05, "loss": 0.7585, "step": 4610 }, { "epoch": 0.46276355987379175, "grad_norm": 2.7635443210601807, "learning_rate": 4.899180129425625e-05, "loss": 0.7919, "step": 4620 }, { "epoch": 0.4637652126007913, "grad_norm": 3.7377936840057373, "learning_rate": 4.8987364716639346e-05, "loss": 0.7324, "step": 4630 }, { "epoch": 0.4647668653277909, "grad_norm": 2.2805068492889404, "learning_rate": 4.898291860063799e-05, "loss": 0.7262, "step": 4640 }, { "epoch": 0.4657685180547904, "grad_norm": 2.1092188358306885, "learning_rate": 4.897846294802014e-05, "loss": 0.7749, "step": 4650 }, { "epoch": 0.46677017078178995, "grad_norm": 2.417418956756592, "learning_rate": 4.8973997760557566e-05, "loss": 0.8886, "step": 4660 }, { "epoch": 0.4677718235087895, "grad_norm": 2.4715187549591064, "learning_rate": 4.89695230400258e-05, "loss": 0.7872, "step": 4670 }, { "epoch": 0.4687734762357891, "grad_norm": 3.4084720611572266, "learning_rate": 4.8965038788204185e-05, "loss": 0.7903, "step": 4680 }, { "epoch": 0.4697751289627886, "grad_norm": 2.9078190326690674, "learning_rate": 4.8960545006875844e-05, "loss": 0.7957, "step": 4690 }, { "epoch": 0.47077678168978815, "grad_norm": 3.02535080909729, "learning_rate": 4.895604169782769e-05, "loss": 0.7782, "step": 4700 }, { "epoch": 0.4717784344167877, "grad_norm": 2.9437060356140137, "learning_rate": 4.8951528862850444e-05, "loss": 0.8061, "step": 4710 }, { "epoch": 0.4727800871437873, "grad_norm": 2.7083654403686523, "learning_rate": 4.894700650373858e-05, "loss": 0.6672, "step": 4720 }, { "epoch": 0.4737817398707868, "grad_norm": 2.350159168243408, "learning_rate": 4.894247462229038e-05, "loss": 0.7149, "step": 4730 }, { "epoch": 0.47478339259778635, "grad_norm": 2.328615665435791, "learning_rate": 4.893793322030793e-05, "loss": 0.734, "step": 4740 }, { "epoch": 0.4757850453247859, "grad_norm": 2.403383255004883, "learning_rate": 4.8933382299597063e-05, "loss": 0.7973, "step": 4750 }, { "epoch": 0.4767866980517855, "grad_norm": 3.2092747688293457, "learning_rate": 4.892882186196742e-05, "loss": 0.7937, "step": 4760 }, { "epoch": 0.477788350778785, "grad_norm": 2.68188214302063, "learning_rate": 4.892425190923242e-05, "loss": 0.7354, "step": 4770 }, { "epoch": 0.47879000350578454, "grad_norm": 2.7987594604492188, "learning_rate": 4.891967244320929e-05, "loss": 0.8261, "step": 4780 }, { "epoch": 0.4797916562327841, "grad_norm": 2.9749321937561035, "learning_rate": 4.8915083465718984e-05, "loss": 0.7946, "step": 4790 }, { "epoch": 0.4807933089597836, "grad_norm": 2.480320692062378, "learning_rate": 4.891048497858629e-05, "loss": 0.7868, "step": 4800 }, { "epoch": 0.4817949616867832, "grad_norm": 3.2900009155273438, "learning_rate": 4.8905876983639775e-05, "loss": 0.7455, "step": 4810 }, { "epoch": 0.48279661441378274, "grad_norm": 2.6279869079589844, "learning_rate": 4.8901259482711744e-05, "loss": 0.7786, "step": 4820 }, { "epoch": 0.4837982671407823, "grad_norm": 2.4610986709594727, "learning_rate": 4.8896632477638324e-05, "loss": 0.7257, "step": 4830 }, { "epoch": 0.4847999198677818, "grad_norm": 2.5319132804870605, "learning_rate": 4.88919959702594e-05, "loss": 0.8303, "step": 4840 }, { "epoch": 0.4858015725947814, "grad_norm": 2.8233323097229004, "learning_rate": 4.888734996241865e-05, "loss": 0.7286, "step": 4850 }, { "epoch": 0.48680322532178094, "grad_norm": 2.4413650035858154, "learning_rate": 4.8882694455963516e-05, "loss": 0.7694, "step": 4860 }, { "epoch": 0.4878048780487805, "grad_norm": 3.778494358062744, "learning_rate": 4.887802945274523e-05, "loss": 0.77, "step": 4870 }, { "epoch": 0.48880653077578, "grad_norm": 3.3177435398101807, "learning_rate": 4.887335495461879e-05, "loss": 0.8672, "step": 4880 }, { "epoch": 0.4898081835027796, "grad_norm": 2.7683284282684326, "learning_rate": 4.886867096344296e-05, "loss": 0.7497, "step": 4890 }, { "epoch": 0.49080983622977914, "grad_norm": 3.1127943992614746, "learning_rate": 4.886397748108031e-05, "loss": 0.7698, "step": 4900 }, { "epoch": 0.4918114889567787, "grad_norm": 2.4643614292144775, "learning_rate": 4.885927450939716e-05, "loss": 0.8021, "step": 4910 }, { "epoch": 0.4928131416837782, "grad_norm": 3.0037858486175537, "learning_rate": 4.8854562050263614e-05, "loss": 0.8111, "step": 4920 }, { "epoch": 0.4938147944107778, "grad_norm": 2.6661083698272705, "learning_rate": 4.8849840105553536e-05, "loss": 0.7426, "step": 4930 }, { "epoch": 0.49481644713777734, "grad_norm": 2.461570978164673, "learning_rate": 4.8845108677144565e-05, "loss": 0.8039, "step": 4940 }, { "epoch": 0.4958180998647769, "grad_norm": 2.258551836013794, "learning_rate": 4.884036776691813e-05, "loss": 0.6808, "step": 4950 }, { "epoch": 0.4968197525917764, "grad_norm": 2.733013153076172, "learning_rate": 4.8835617376759405e-05, "loss": 0.8166, "step": 4960 }, { "epoch": 0.497821405318776, "grad_norm": 2.9279303550720215, "learning_rate": 4.8830857508557344e-05, "loss": 0.7596, "step": 4970 }, { "epoch": 0.49882305804577554, "grad_norm": 2.5595335960388184, "learning_rate": 4.882608816420467e-05, "loss": 0.7144, "step": 4980 }, { "epoch": 0.49982471077277507, "grad_norm": 1.9373695850372314, "learning_rate": 4.88213093455979e-05, "loss": 0.7014, "step": 4990 }, { "epoch": 0.5008263634997746, "grad_norm": 3.3078603744506836, "learning_rate": 4.8816521054637264e-05, "loss": 0.7673, "step": 5000 }, { "epoch": 0.5018280162267742, "grad_norm": 3.2751052379608154, "learning_rate": 4.8811723293226796e-05, "loss": 0.7277, "step": 5010 }, { "epoch": 0.5028296689537737, "grad_norm": 2.650498628616333, "learning_rate": 4.880691606327429e-05, "loss": 0.8175, "step": 5020 }, { "epoch": 0.5038313216807733, "grad_norm": 3.0442254543304443, "learning_rate": 4.88020993666913e-05, "loss": 0.7676, "step": 5030 }, { "epoch": 0.5048329744077729, "grad_norm": 2.6046295166015625, "learning_rate": 4.8797273205393144e-05, "loss": 0.6961, "step": 5040 }, { "epoch": 0.5058346271347723, "grad_norm": 2.5837275981903076, "learning_rate": 4.8792437581298923e-05, "loss": 0.7531, "step": 5050 }, { "epoch": 0.5068362798617719, "grad_norm": 2.275136947631836, "learning_rate": 4.8787592496331456e-05, "loss": 0.7709, "step": 5060 }, { "epoch": 0.5078379325887715, "grad_norm": 3.156888961791992, "learning_rate": 4.8782737952417376e-05, "loss": 0.818, "step": 5070 }, { "epoch": 0.508839585315771, "grad_norm": 3.180203914642334, "learning_rate": 4.877787395148705e-05, "loss": 0.8009, "step": 5080 }, { "epoch": 0.5098412380427706, "grad_norm": 2.5815541744232178, "learning_rate": 4.877300049547461e-05, "loss": 0.766, "step": 5090 }, { "epoch": 0.5108428907697701, "grad_norm": 3.7498693466186523, "learning_rate": 4.876811758631793e-05, "loss": 0.6966, "step": 5100 }, { "epoch": 0.5118445434967697, "grad_norm": 2.6118204593658447, "learning_rate": 4.8763225225958686e-05, "loss": 0.7633, "step": 5110 }, { "epoch": 0.5128461962237693, "grad_norm": 2.1438217163085938, "learning_rate": 4.875832341634227e-05, "loss": 0.7183, "step": 5120 }, { "epoch": 0.5138478489507687, "grad_norm": 3.222095012664795, "learning_rate": 4.875341215941784e-05, "loss": 0.7274, "step": 5130 }, { "epoch": 0.5148495016777683, "grad_norm": 2.6931872367858887, "learning_rate": 4.874849145713833e-05, "loss": 0.7493, "step": 5140 }, { "epoch": 0.5158511544047679, "grad_norm": 2.759004831314087, "learning_rate": 4.8743561311460424e-05, "loss": 0.7586, "step": 5150 }, { "epoch": 0.5168528071317674, "grad_norm": 2.3311071395874023, "learning_rate": 4.873862172434455e-05, "loss": 0.7245, "step": 5160 }, { "epoch": 0.517854459858767, "grad_norm": 2.3808846473693848, "learning_rate": 4.8733672697754884e-05, "loss": 0.7359, "step": 5170 }, { "epoch": 0.5188561125857665, "grad_norm": 2.6579818725585938, "learning_rate": 4.8728714233659376e-05, "loss": 0.7077, "step": 5180 }, { "epoch": 0.5198577653127661, "grad_norm": 2.8846435546875, "learning_rate": 4.872374633402972e-05, "loss": 0.7361, "step": 5190 }, { "epoch": 0.5208594180397657, "grad_norm": 3.370232105255127, "learning_rate": 4.871876900084137e-05, "loss": 0.7259, "step": 5200 }, { "epoch": 0.5218610707667651, "grad_norm": 2.5193967819213867, "learning_rate": 4.871378223607351e-05, "loss": 0.7675, "step": 5210 }, { "epoch": 0.5228627234937647, "grad_norm": 2.9328339099884033, "learning_rate": 4.870878604170909e-05, "loss": 0.8851, "step": 5220 }, { "epoch": 0.5238643762207643, "grad_norm": 3.101991891860962, "learning_rate": 4.870378041973481e-05, "loss": 0.776, "step": 5230 }, { "epoch": 0.5248660289477638, "grad_norm": 3.0238535404205322, "learning_rate": 4.8698765372141106e-05, "loss": 0.7957, "step": 5240 }, { "epoch": 0.5258676816747634, "grad_norm": 2.859778642654419, "learning_rate": 4.8693740900922193e-05, "loss": 0.7584, "step": 5250 }, { "epoch": 0.5268693344017629, "grad_norm": 2.8081979751586914, "learning_rate": 4.868870700807599e-05, "loss": 0.8051, "step": 5260 }, { "epoch": 0.5278709871287625, "grad_norm": 3.409069061279297, "learning_rate": 4.86836636956042e-05, "loss": 0.7246, "step": 5270 }, { "epoch": 0.528872639855762, "grad_norm": 2.7888481616973877, "learning_rate": 4.867861096551224e-05, "loss": 0.7656, "step": 5280 }, { "epoch": 0.5298742925827615, "grad_norm": 2.872657299041748, "learning_rate": 4.867354881980931e-05, "loss": 0.8549, "step": 5290 }, { "epoch": 0.5308759453097611, "grad_norm": 3.0871007442474365, "learning_rate": 4.8668477260508304e-05, "loss": 0.757, "step": 5300 }, { "epoch": 0.5318775980367606, "grad_norm": 2.824735641479492, "learning_rate": 4.866339628962591e-05, "loss": 0.8681, "step": 5310 }, { "epoch": 0.5328792507637602, "grad_norm": 3.060173273086548, "learning_rate": 4.8658305909182535e-05, "loss": 0.7545, "step": 5320 }, { "epoch": 0.5338809034907598, "grad_norm": 2.845705509185791, "learning_rate": 4.865320612120231e-05, "loss": 0.766, "step": 5330 }, { "epoch": 0.5348825562177593, "grad_norm": 2.0810883045196533, "learning_rate": 4.8648096927713135e-05, "loss": 0.7974, "step": 5340 }, { "epoch": 0.5358842089447589, "grad_norm": 2.593435764312744, "learning_rate": 4.864297833074665e-05, "loss": 0.758, "step": 5350 }, { "epoch": 0.5368858616717584, "grad_norm": 2.5074310302734375, "learning_rate": 4.863785033233821e-05, "loss": 0.7201, "step": 5360 }, { "epoch": 0.5378875143987579, "grad_norm": 2.603663444519043, "learning_rate": 4.863271293452693e-05, "loss": 0.6588, "step": 5370 }, { "epoch": 0.5388891671257575, "grad_norm": 2.7700934410095215, "learning_rate": 4.862756613935565e-05, "loss": 0.6865, "step": 5380 }, { "epoch": 0.539890819852757, "grad_norm": 2.748194932937622, "learning_rate": 4.862240994887096e-05, "loss": 0.7789, "step": 5390 }, { "epoch": 0.5408924725797566, "grad_norm": 2.4352834224700928, "learning_rate": 4.8617244365123174e-05, "loss": 0.7766, "step": 5400 }, { "epoch": 0.5418941253067562, "grad_norm": 2.7042815685272217, "learning_rate": 4.8612069390166344e-05, "loss": 0.7305, "step": 5410 }, { "epoch": 0.5428957780337557, "grad_norm": 2.7817211151123047, "learning_rate": 4.860688502605826e-05, "loss": 0.8304, "step": 5420 }, { "epoch": 0.5438974307607553, "grad_norm": 2.6968953609466553, "learning_rate": 4.860169127486043e-05, "loss": 0.6718, "step": 5430 }, { "epoch": 0.5448990834877548, "grad_norm": 2.5667612552642822, "learning_rate": 4.859648813863813e-05, "loss": 0.7263, "step": 5440 }, { "epoch": 0.5459007362147543, "grad_norm": 2.4719300270080566, "learning_rate": 4.859127561946033e-05, "loss": 0.7459, "step": 5450 }, { "epoch": 0.5469023889417539, "grad_norm": 2.9818973541259766, "learning_rate": 4.858605371939976e-05, "loss": 0.7407, "step": 5460 }, { "epoch": 0.5479040416687534, "grad_norm": 2.792579412460327, "learning_rate": 4.8580822440532845e-05, "loss": 0.6898, "step": 5470 }, { "epoch": 0.548905694395753, "grad_norm": 2.4535489082336426, "learning_rate": 4.857558178493979e-05, "loss": 0.7494, "step": 5480 }, { "epoch": 0.5499073471227526, "grad_norm": 2.920201539993286, "learning_rate": 4.857033175470448e-05, "loss": 0.7872, "step": 5490 }, { "epoch": 0.5509089998497521, "grad_norm": 2.750412702560425, "learning_rate": 4.856507235191454e-05, "loss": 0.784, "step": 5500 }, { "epoch": 0.5519106525767516, "grad_norm": 3.8967442512512207, "learning_rate": 4.8559803578661356e-05, "loss": 0.7216, "step": 5510 }, { "epoch": 0.5529123053037512, "grad_norm": 2.851120948791504, "learning_rate": 4.855452543704e-05, "loss": 0.7928, "step": 5520 }, { "epoch": 0.5539139580307507, "grad_norm": 2.98593807220459, "learning_rate": 4.8549237929149275e-05, "loss": 0.8081, "step": 5530 }, { "epoch": 0.5549156107577503, "grad_norm": 3.0606110095977783, "learning_rate": 4.854394105709173e-05, "loss": 0.7919, "step": 5540 }, { "epoch": 0.5559172634847498, "grad_norm": 2.4815075397491455, "learning_rate": 4.8538634822973616e-05, "loss": 0.8171, "step": 5550 }, { "epoch": 0.5569189162117494, "grad_norm": 3.970489740371704, "learning_rate": 4.853331922890492e-05, "loss": 0.7901, "step": 5560 }, { "epoch": 0.557920568938749, "grad_norm": 3.3668770790100098, "learning_rate": 4.852799427699934e-05, "loss": 0.7935, "step": 5570 }, { "epoch": 0.5589222216657485, "grad_norm": 2.571666955947876, "learning_rate": 4.8522659969374303e-05, "loss": 0.7554, "step": 5580 }, { "epoch": 0.559923874392748, "grad_norm": 2.3285953998565674, "learning_rate": 4.851731630815095e-05, "loss": 0.7063, "step": 5590 }, { "epoch": 0.5609255271197476, "grad_norm": 2.6997599601745605, "learning_rate": 4.8511963295454156e-05, "loss": 0.6977, "step": 5600 }, { "epoch": 0.5619271798467471, "grad_norm": 3.021113395690918, "learning_rate": 4.8506600933412494e-05, "loss": 0.8443, "step": 5610 }, { "epoch": 0.5629288325737467, "grad_norm": 2.5922207832336426, "learning_rate": 4.850122922415827e-05, "loss": 0.7096, "step": 5620 }, { "epoch": 0.5639304853007462, "grad_norm": 2.5463130474090576, "learning_rate": 4.84958481698275e-05, "loss": 0.7234, "step": 5630 }, { "epoch": 0.5649321380277458, "grad_norm": 3.00300931930542, "learning_rate": 4.8490457772559915e-05, "loss": 0.7499, "step": 5640 }, { "epoch": 0.5659337907547454, "grad_norm": 1.9227794408798218, "learning_rate": 4.848505803449897e-05, "loss": 0.7959, "step": 5650 }, { "epoch": 0.5669354434817448, "grad_norm": 2.412663698196411, "learning_rate": 4.847964895779181e-05, "loss": 0.6528, "step": 5660 }, { "epoch": 0.5679370962087444, "grad_norm": 2.4437978267669678, "learning_rate": 4.847423054458933e-05, "loss": 0.7367, "step": 5670 }, { "epoch": 0.568938748935744, "grad_norm": 2.260493040084839, "learning_rate": 4.846880279704612e-05, "loss": 0.7545, "step": 5680 }, { "epoch": 0.5699404016627435, "grad_norm": 2.8233468532562256, "learning_rate": 4.846336571732046e-05, "loss": 0.7347, "step": 5690 }, { "epoch": 0.5709420543897431, "grad_norm": 2.773327350616455, "learning_rate": 4.845791930757438e-05, "loss": 0.7908, "step": 5700 }, { "epoch": 0.5719437071167426, "grad_norm": 3.0723698139190674, "learning_rate": 4.84524635699736e-05, "loss": 0.7355, "step": 5710 }, { "epoch": 0.5729453598437422, "grad_norm": 3.2027089595794678, "learning_rate": 4.844699850668756e-05, "loss": 0.792, "step": 5720 }, { "epoch": 0.5739470125707418, "grad_norm": 2.687241554260254, "learning_rate": 4.844152411988937e-05, "loss": 0.6883, "step": 5730 }, { "epoch": 0.5749486652977412, "grad_norm": 3.0531158447265625, "learning_rate": 4.8436040411755887e-05, "loss": 0.7003, "step": 5740 }, { "epoch": 0.5759503180247408, "grad_norm": 3.0482845306396484, "learning_rate": 4.8430547384467684e-05, "loss": 0.701, "step": 5750 }, { "epoch": 0.5769519707517403, "grad_norm": 2.8414599895477295, "learning_rate": 4.8425045040209e-05, "loss": 0.7466, "step": 5760 }, { "epoch": 0.5779536234787399, "grad_norm": 2.379384994506836, "learning_rate": 4.84195333811678e-05, "loss": 0.7467, "step": 5770 }, { "epoch": 0.5789552762057395, "grad_norm": 2.5822062492370605, "learning_rate": 4.8414012409535755e-05, "loss": 0.7882, "step": 5780 }, { "epoch": 0.579956928932739, "grad_norm": 2.840921640396118, "learning_rate": 4.840848212750824e-05, "loss": 0.7207, "step": 5790 }, { "epoch": 0.5809585816597386, "grad_norm": 2.9414560794830322, "learning_rate": 4.840294253728431e-05, "loss": 0.754, "step": 5800 }, { "epoch": 0.5819602343867382, "grad_norm": 2.658170223236084, "learning_rate": 4.8397393641066755e-05, "loss": 0.7069, "step": 5810 }, { "epoch": 0.5829618871137376, "grad_norm": 2.7854273319244385, "learning_rate": 4.839183544106205e-05, "loss": 0.7554, "step": 5820 }, { "epoch": 0.5839635398407372, "grad_norm": 2.4555301666259766, "learning_rate": 4.838626793948037e-05, "loss": 0.7387, "step": 5830 }, { "epoch": 0.5849651925677367, "grad_norm": 2.213578939437866, "learning_rate": 4.838069113853557e-05, "loss": 0.6798, "step": 5840 }, { "epoch": 0.5859668452947363, "grad_norm": 3.014767646789551, "learning_rate": 4.837510504044523e-05, "loss": 0.6855, "step": 5850 }, { "epoch": 0.5869684980217359, "grad_norm": 2.637495994567871, "learning_rate": 4.836950964743063e-05, "loss": 0.7603, "step": 5860 }, { "epoch": 0.5879701507487354, "grad_norm": 2.4447574615478516, "learning_rate": 4.8363904961716726e-05, "loss": 0.7416, "step": 5870 }, { "epoch": 0.588971803475735, "grad_norm": 2.8144404888153076, "learning_rate": 4.835829098553217e-05, "loss": 0.7945, "step": 5880 }, { "epoch": 0.5899734562027346, "grad_norm": 2.2470645904541016, "learning_rate": 4.8352667721109314e-05, "loss": 0.6977, "step": 5890 }, { "epoch": 0.590975108929734, "grad_norm": 4.678529739379883, "learning_rate": 4.834703517068422e-05, "loss": 0.695, "step": 5900 }, { "epoch": 0.5919767616567336, "grad_norm": 3.0645904541015625, "learning_rate": 4.83413933364966e-05, "loss": 0.7326, "step": 5910 }, { "epoch": 0.5929784143837331, "grad_norm": 3.1756017208099365, "learning_rate": 4.833574222078991e-05, "loss": 0.7561, "step": 5920 }, { "epoch": 0.5939800671107327, "grad_norm": 2.3154969215393066, "learning_rate": 4.833008182581127e-05, "loss": 0.7144, "step": 5930 }, { "epoch": 0.5949817198377323, "grad_norm": 3.0473766326904297, "learning_rate": 4.832441215381147e-05, "loss": 0.6792, "step": 5940 }, { "epoch": 0.5959833725647318, "grad_norm": 2.6138970851898193, "learning_rate": 4.8318733207045026e-05, "loss": 0.7414, "step": 5950 }, { "epoch": 0.5969850252917314, "grad_norm": 2.8817873001098633, "learning_rate": 4.831304498777012e-05, "loss": 0.6992, "step": 5960 }, { "epoch": 0.597986678018731, "grad_norm": 2.776374340057373, "learning_rate": 4.830734749824863e-05, "loss": 0.7556, "step": 5970 }, { "epoch": 0.5989883307457304, "grad_norm": 3.001796245574951, "learning_rate": 4.830164074074612e-05, "loss": 0.7315, "step": 5980 }, { "epoch": 0.59998998347273, "grad_norm": 2.222621202468872, "learning_rate": 4.8295924717531833e-05, "loss": 0.6979, "step": 5990 }, { "epoch": 0.6009916361997295, "grad_norm": 2.8218321800231934, "learning_rate": 4.82901994308787e-05, "loss": 0.6922, "step": 6000 }, { "epoch": 0.6019932889267291, "grad_norm": 2.5931153297424316, "learning_rate": 4.828446488306333e-05, "loss": 0.6913, "step": 6010 }, { "epoch": 0.6029949416537287, "grad_norm": 2.5786752700805664, "learning_rate": 4.827872107636604e-05, "loss": 0.7638, "step": 6020 }, { "epoch": 0.6039965943807282, "grad_norm": 3.696964740753174, "learning_rate": 4.8272968013070785e-05, "loss": 0.726, "step": 6030 }, { "epoch": 0.6049982471077278, "grad_norm": 3.4040944576263428, "learning_rate": 4.8267205695465236e-05, "loss": 0.712, "step": 6040 }, { "epoch": 0.6059998998347274, "grad_norm": 2.7188923358917236, "learning_rate": 4.8261434125840735e-05, "loss": 0.6717, "step": 6050 }, { "epoch": 0.6070015525617268, "grad_norm": 2.0458145141601562, "learning_rate": 4.825565330649229e-05, "loss": 0.6694, "step": 6060 }, { "epoch": 0.6080032052887264, "grad_norm": 2.1476712226867676, "learning_rate": 4.8249863239718604e-05, "loss": 0.7041, "step": 6070 }, { "epoch": 0.6090048580157259, "grad_norm": 2.831986904144287, "learning_rate": 4.824406392782206e-05, "loss": 0.6909, "step": 6080 }, { "epoch": 0.6100065107427255, "grad_norm": 2.525928497314453, "learning_rate": 4.82382553731087e-05, "loss": 0.6722, "step": 6090 }, { "epoch": 0.6110081634697251, "grad_norm": 2.2550671100616455, "learning_rate": 4.823243757788825e-05, "loss": 0.7381, "step": 6100 }, { "epoch": 0.6120098161967246, "grad_norm": 2.4147746562957764, "learning_rate": 4.822661054447411e-05, "loss": 0.8251, "step": 6110 }, { "epoch": 0.6130114689237242, "grad_norm": 2.8840737342834473, "learning_rate": 4.822077427518335e-05, "loss": 0.7597, "step": 6120 }, { "epoch": 0.6140131216507237, "grad_norm": 2.4112513065338135, "learning_rate": 4.821492877233672e-05, "loss": 0.7331, "step": 6130 }, { "epoch": 0.6150147743777232, "grad_norm": 2.4079248905181885, "learning_rate": 4.8209074038258636e-05, "loss": 0.8029, "step": 6140 }, { "epoch": 0.6160164271047228, "grad_norm": 2.258690595626831, "learning_rate": 4.8203210075277194e-05, "loss": 0.6666, "step": 6150 }, { "epoch": 0.6170180798317223, "grad_norm": 2.800661325454712, "learning_rate": 4.819733688572414e-05, "loss": 0.702, "step": 6160 }, { "epoch": 0.6180197325587219, "grad_norm": 2.231372833251953, "learning_rate": 4.81914544719349e-05, "loss": 0.6428, "step": 6170 }, { "epoch": 0.6190213852857215, "grad_norm": 2.4702677726745605, "learning_rate": 4.818556283624858e-05, "loss": 0.6676, "step": 6180 }, { "epoch": 0.620023038012721, "grad_norm": 2.390310287475586, "learning_rate": 4.817966198100794e-05, "loss": 0.715, "step": 6190 }, { "epoch": 0.6210246907397206, "grad_norm": 2.378634214401245, "learning_rate": 4.81737519085594e-05, "loss": 0.7538, "step": 6200 }, { "epoch": 0.62202634346672, "grad_norm": 2.6578643321990967, "learning_rate": 4.816783262125306e-05, "loss": 0.7171, "step": 6210 }, { "epoch": 0.6230279961937196, "grad_norm": 2.7009196281433105, "learning_rate": 4.816190412144268e-05, "loss": 0.7951, "step": 6220 }, { "epoch": 0.6240296489207192, "grad_norm": 3.3209683895111084, "learning_rate": 4.8155966411485676e-05, "loss": 0.7156, "step": 6230 }, { "epoch": 0.6250313016477187, "grad_norm": 2.5062389373779297, "learning_rate": 4.8150019493743125e-05, "loss": 0.7081, "step": 6240 }, { "epoch": 0.6260329543747183, "grad_norm": 2.785179376602173, "learning_rate": 4.8144063370579785e-05, "loss": 0.6611, "step": 6250 }, { "epoch": 0.6270346071017179, "grad_norm": 3.0619423389434814, "learning_rate": 4.8138098044364056e-05, "loss": 0.7037, "step": 6260 }, { "epoch": 0.6280362598287174, "grad_norm": 3.4450790882110596, "learning_rate": 4.8132123517467995e-05, "loss": 0.7167, "step": 6270 }, { "epoch": 0.629037912555717, "grad_norm": 3.5104589462280273, "learning_rate": 4.8126139792267334e-05, "loss": 0.6591, "step": 6280 }, { "epoch": 0.6300395652827164, "grad_norm": 2.983975410461426, "learning_rate": 4.812014687114145e-05, "loss": 0.6971, "step": 6290 }, { "epoch": 0.631041218009716, "grad_norm": 3.098531484603882, "learning_rate": 4.811414475647337e-05, "loss": 0.6375, "step": 6300 }, { "epoch": 0.6320428707367156, "grad_norm": 2.6577069759368896, "learning_rate": 4.81081334506498e-05, "loss": 0.7211, "step": 6310 }, { "epoch": 0.6330445234637151, "grad_norm": 2.649841547012329, "learning_rate": 4.8102112956061105e-05, "loss": 0.6979, "step": 6320 }, { "epoch": 0.6340461761907147, "grad_norm": 3.851295232772827, "learning_rate": 4.809608327510125e-05, "loss": 0.7051, "step": 6330 }, { "epoch": 0.6350478289177143, "grad_norm": 2.3493716716766357, "learning_rate": 4.8090044410167914e-05, "loss": 0.7381, "step": 6340 }, { "epoch": 0.6360494816447138, "grad_norm": 2.7770872116088867, "learning_rate": 4.8083996363662386e-05, "loss": 0.7313, "step": 6350 }, { "epoch": 0.6370511343717133, "grad_norm": 2.8262245655059814, "learning_rate": 4.8077939137989645e-05, "loss": 0.736, "step": 6360 }, { "epoch": 0.6380527870987128, "grad_norm": 2.4160664081573486, "learning_rate": 4.807187273555828e-05, "loss": 0.6895, "step": 6370 }, { "epoch": 0.6390544398257124, "grad_norm": 2.7470526695251465, "learning_rate": 4.8065797158780556e-05, "loss": 0.7013, "step": 6380 }, { "epoch": 0.640056092552712, "grad_norm": 2.770486354827881, "learning_rate": 4.8059712410072376e-05, "loss": 0.8161, "step": 6390 }, { "epoch": 0.6410577452797115, "grad_norm": 2.5913543701171875, "learning_rate": 4.80536184918533e-05, "loss": 0.6284, "step": 6400 }, { "epoch": 0.6420593980067111, "grad_norm": 2.855409860610962, "learning_rate": 4.804751540654651e-05, "loss": 0.695, "step": 6410 }, { "epoch": 0.6430610507337107, "grad_norm": 3.0769102573394775, "learning_rate": 4.8041403156578864e-05, "loss": 0.7523, "step": 6420 }, { "epoch": 0.6440627034607102, "grad_norm": 3.437361240386963, "learning_rate": 4.803528174438084e-05, "loss": 0.7152, "step": 6430 }, { "epoch": 0.6450643561877097, "grad_norm": 3.7562596797943115, "learning_rate": 4.802915117238657e-05, "loss": 0.7299, "step": 6440 }, { "epoch": 0.6460660089147092, "grad_norm": 2.92394757270813, "learning_rate": 4.8023011443033835e-05, "loss": 0.7394, "step": 6450 }, { "epoch": 0.6470676616417088, "grad_norm": 2.9074056148529053, "learning_rate": 4.8016862558764034e-05, "loss": 0.7502, "step": 6460 }, { "epoch": 0.6480693143687084, "grad_norm": 2.7694075107574463, "learning_rate": 4.801070452202224e-05, "loss": 0.7541, "step": 6470 }, { "epoch": 0.6490709670957079, "grad_norm": 3.2710673809051514, "learning_rate": 4.800453733525714e-05, "loss": 0.6723, "step": 6480 }, { "epoch": 0.6500726198227075, "grad_norm": 2.673231601715088, "learning_rate": 4.7998361000921055e-05, "loss": 0.7593, "step": 6490 }, { "epoch": 0.6510742725497071, "grad_norm": 2.528330087661743, "learning_rate": 4.7992175521469975e-05, "loss": 0.7758, "step": 6500 }, { "epoch": 0.6520759252767065, "grad_norm": 2.416775703430176, "learning_rate": 4.798598089936349e-05, "loss": 0.7003, "step": 6510 }, { "epoch": 0.6530775780037061, "grad_norm": 3.5243756771087646, "learning_rate": 4.7979777137064854e-05, "loss": 0.6824, "step": 6520 }, { "epoch": 0.6540792307307056, "grad_norm": 2.3242297172546387, "learning_rate": 4.7973564237040936e-05, "loss": 0.6347, "step": 6530 }, { "epoch": 0.6550808834577052, "grad_norm": 2.6728193759918213, "learning_rate": 4.7967342201762244e-05, "loss": 0.6672, "step": 6540 }, { "epoch": 0.6560825361847048, "grad_norm": 2.3104145526885986, "learning_rate": 4.7961111033702933e-05, "loss": 0.6489, "step": 6550 }, { "epoch": 0.6570841889117043, "grad_norm": 2.7398927211761475, "learning_rate": 4.795487073534077e-05, "loss": 0.6653, "step": 6560 }, { "epoch": 0.6580858416387039, "grad_norm": 3.224385976791382, "learning_rate": 4.794862130915716e-05, "loss": 0.6914, "step": 6570 }, { "epoch": 0.6590874943657035, "grad_norm": 2.406184196472168, "learning_rate": 4.794236275763714e-05, "loss": 0.6799, "step": 6580 }, { "epoch": 0.6600891470927029, "grad_norm": 3.6518681049346924, "learning_rate": 4.793609508326936e-05, "loss": 0.7738, "step": 6590 }, { "epoch": 0.6610907998197025, "grad_norm": 3.039585828781128, "learning_rate": 4.7929818288546136e-05, "loss": 0.641, "step": 6600 }, { "epoch": 0.662092452546702, "grad_norm": 3.637732744216919, "learning_rate": 4.792353237596336e-05, "loss": 0.7518, "step": 6610 }, { "epoch": 0.6630941052737016, "grad_norm": 2.7439217567443848, "learning_rate": 4.7917237348020594e-05, "loss": 0.6375, "step": 6620 }, { "epoch": 0.6640957580007012, "grad_norm": 2.8755624294281006, "learning_rate": 4.7910933207220985e-05, "loss": 0.6667, "step": 6630 }, { "epoch": 0.6650974107277007, "grad_norm": 2.3648972511291504, "learning_rate": 4.790461995607135e-05, "loss": 0.7319, "step": 6640 }, { "epoch": 0.6660990634547003, "grad_norm": 2.573380470275879, "learning_rate": 4.789829759708209e-05, "loss": 0.7848, "step": 6650 }, { "epoch": 0.6671007161816999, "grad_norm": 2.4021103382110596, "learning_rate": 4.789196613276723e-05, "loss": 0.6436, "step": 6660 }, { "epoch": 0.6681023689086993, "grad_norm": 2.582141399383545, "learning_rate": 4.7885625565644444e-05, "loss": 0.7283, "step": 6670 }, { "epoch": 0.6691040216356989, "grad_norm": 2.6833086013793945, "learning_rate": 4.7879275898235e-05, "loss": 0.6683, "step": 6680 }, { "epoch": 0.6701056743626984, "grad_norm": 2.5321731567382812, "learning_rate": 4.787291713306379e-05, "loss": 0.7005, "step": 6690 }, { "epoch": 0.671107327089698, "grad_norm": 2.673696279525757, "learning_rate": 4.786654927265933e-05, "loss": 0.7181, "step": 6700 }, { "epoch": 0.6721089798166976, "grad_norm": 2.5562915802001953, "learning_rate": 4.7860172319553753e-05, "loss": 0.6797, "step": 6710 }, { "epoch": 0.6731106325436971, "grad_norm": 2.444516897201538, "learning_rate": 4.78537862762828e-05, "loss": 0.6879, "step": 6720 }, { "epoch": 0.6741122852706967, "grad_norm": 2.7865781784057617, "learning_rate": 4.7847391145385834e-05, "loss": 0.6981, "step": 6730 }, { "epoch": 0.6751139379976961, "grad_norm": 2.3912670612335205, "learning_rate": 4.784098692940582e-05, "loss": 0.6845, "step": 6740 }, { "epoch": 0.6761155907246957, "grad_norm": 3.1096134185791016, "learning_rate": 4.7834573630889333e-05, "loss": 0.7959, "step": 6750 }, { "epoch": 0.6771172434516953, "grad_norm": 2.7713048458099365, "learning_rate": 4.78281512523866e-05, "loss": 0.7643, "step": 6760 }, { "epoch": 0.6781188961786948, "grad_norm": 2.5605435371398926, "learning_rate": 4.782171979645141e-05, "loss": 0.6974, "step": 6770 }, { "epoch": 0.6791205489056944, "grad_norm": 2.641727924346924, "learning_rate": 4.7815279265641186e-05, "loss": 0.7517, "step": 6780 }, { "epoch": 0.680122201632694, "grad_norm": 2.469717502593994, "learning_rate": 4.780882966251694e-05, "loss": 0.7026, "step": 6790 }, { "epoch": 0.6811238543596935, "grad_norm": 2.858966827392578, "learning_rate": 4.7802370989643324e-05, "loss": 0.7318, "step": 6800 }, { "epoch": 0.6821255070866931, "grad_norm": 2.377610206604004, "learning_rate": 4.779590324958857e-05, "loss": 0.7191, "step": 6810 }, { "epoch": 0.6831271598136925, "grad_norm": 2.691115379333496, "learning_rate": 4.7789426444924525e-05, "loss": 0.7618, "step": 6820 }, { "epoch": 0.6841288125406921, "grad_norm": 2.288198947906494, "learning_rate": 4.778294057822663e-05, "loss": 0.7858, "step": 6830 }, { "epoch": 0.6851304652676917, "grad_norm": 2.3845489025115967, "learning_rate": 4.7776445652073944e-05, "loss": 0.7049, "step": 6840 }, { "epoch": 0.6861321179946912, "grad_norm": 2.66279673576355, "learning_rate": 4.776994166904913e-05, "loss": 0.7543, "step": 6850 }, { "epoch": 0.6871337707216908, "grad_norm": 2.5185487270355225, "learning_rate": 4.776342863173844e-05, "loss": 0.7063, "step": 6860 }, { "epoch": 0.6881354234486904, "grad_norm": 2.798473834991455, "learning_rate": 4.775690654273172e-05, "loss": 0.7993, "step": 6870 }, { "epoch": 0.6891370761756899, "grad_norm": 4.163158893585205, "learning_rate": 4.775037540462245e-05, "loss": 0.6505, "step": 6880 }, { "epoch": 0.6901387289026895, "grad_norm": 2.7145819664001465, "learning_rate": 4.774383522000766e-05, "loss": 0.7308, "step": 6890 }, { "epoch": 0.6911403816296889, "grad_norm": 2.461956024169922, "learning_rate": 4.7737285991488027e-05, "loss": 0.6925, "step": 6900 }, { "epoch": 0.6921420343566885, "grad_norm": 2.0583384037017822, "learning_rate": 4.7730727721667776e-05, "loss": 0.6763, "step": 6910 }, { "epoch": 0.6931436870836881, "grad_norm": 3.1688992977142334, "learning_rate": 4.7724160413154764e-05, "loss": 0.6261, "step": 6920 }, { "epoch": 0.6941453398106876, "grad_norm": 3.163015604019165, "learning_rate": 4.771758406856043e-05, "loss": 0.6836, "step": 6930 }, { "epoch": 0.6951469925376872, "grad_norm": 2.387488842010498, "learning_rate": 4.7710998690499794e-05, "loss": 0.7193, "step": 6940 }, { "epoch": 0.6961486452646868, "grad_norm": 3.6488964557647705, "learning_rate": 4.770440428159149e-05, "loss": 0.6738, "step": 6950 }, { "epoch": 0.6971502979916863, "grad_norm": 2.8245675563812256, "learning_rate": 4.769780084445773e-05, "loss": 0.7247, "step": 6960 }, { "epoch": 0.6981519507186859, "grad_norm": 2.353736162185669, "learning_rate": 4.769118838172432e-05, "loss": 0.6718, "step": 6970 }, { "epoch": 0.6991536034456853, "grad_norm": 2.815807819366455, "learning_rate": 4.7684566896020645e-05, "loss": 0.7584, "step": 6980 }, { "epoch": 0.7001552561726849, "grad_norm": 3.2099483013153076, "learning_rate": 4.767793638997969e-05, "loss": 0.6991, "step": 6990 }, { "epoch": 0.7011569088996845, "grad_norm": 2.4797520637512207, "learning_rate": 4.7671296866238025e-05, "loss": 0.7686, "step": 7000 }, { "epoch": 0.702158561626684, "grad_norm": 2.62703275680542, "learning_rate": 4.76646483274358e-05, "loss": 0.7068, "step": 7010 }, { "epoch": 0.7031602143536836, "grad_norm": 2.9379310607910156, "learning_rate": 4.765799077621677e-05, "loss": 0.696, "step": 7020 }, { "epoch": 0.7041618670806832, "grad_norm": 2.635833740234375, "learning_rate": 4.765132421522823e-05, "loss": 0.6948, "step": 7030 }, { "epoch": 0.7051635198076827, "grad_norm": 2.5286707878112793, "learning_rate": 4.7644648647121096e-05, "loss": 0.6741, "step": 7040 }, { "epoch": 0.7061651725346823, "grad_norm": 2.699763059616089, "learning_rate": 4.7637964074549865e-05, "loss": 0.721, "step": 7050 }, { "epoch": 0.7071668252616817, "grad_norm": 2.9536404609680176, "learning_rate": 4.763127050017259e-05, "loss": 0.7539, "step": 7060 }, { "epoch": 0.7081684779886813, "grad_norm": 1.9956881999969482, "learning_rate": 4.762456792665093e-05, "loss": 0.6176, "step": 7070 }, { "epoch": 0.7091701307156809, "grad_norm": 2.333440065383911, "learning_rate": 4.761785635665009e-05, "loss": 0.7214, "step": 7080 }, { "epoch": 0.7101717834426804, "grad_norm": 2.3856465816497803, "learning_rate": 4.76111357928389e-05, "loss": 0.681, "step": 7090 }, { "epoch": 0.71117343616968, "grad_norm": 2.928725242614746, "learning_rate": 4.760440623788972e-05, "loss": 0.738, "step": 7100 }, { "epoch": 0.7121750888966796, "grad_norm": 2.650521993637085, "learning_rate": 4.75976676944785e-05, "loss": 0.6664, "step": 7110 }, { "epoch": 0.7131767416236791, "grad_norm": 2.572051525115967, "learning_rate": 4.7590920165284785e-05, "loss": 0.7096, "step": 7120 }, { "epoch": 0.7141783943506786, "grad_norm": 2.118530750274658, "learning_rate": 4.758416365299166e-05, "loss": 0.7019, "step": 7130 }, { "epoch": 0.7151800470776781, "grad_norm": 2.7526917457580566, "learning_rate": 4.757739816028581e-05, "loss": 0.6642, "step": 7140 }, { "epoch": 0.7161816998046777, "grad_norm": 2.8893725872039795, "learning_rate": 4.757062368985748e-05, "loss": 0.685, "step": 7150 }, { "epoch": 0.7171833525316773, "grad_norm": 2.5351388454437256, "learning_rate": 4.756384024440047e-05, "loss": 0.6915, "step": 7160 }, { "epoch": 0.7181850052586768, "grad_norm": 2.42851185798645, "learning_rate": 4.7557047826612176e-05, "loss": 0.7623, "step": 7170 }, { "epoch": 0.7191866579856764, "grad_norm": 3.844846725463867, "learning_rate": 4.7550246439193546e-05, "loss": 0.7131, "step": 7180 }, { "epoch": 0.7201883107126759, "grad_norm": 2.4313747882843018, "learning_rate": 4.7543436084849094e-05, "loss": 0.6911, "step": 7190 }, { "epoch": 0.7211899634396755, "grad_norm": 2.404658317565918, "learning_rate": 4.7536616766286915e-05, "loss": 0.6961, "step": 7200 }, { "epoch": 0.722191616166675, "grad_norm": 2.1430392265319824, "learning_rate": 4.752978848621863e-05, "loss": 0.7149, "step": 7210 }, { "epoch": 0.7231932688936745, "grad_norm": 2.6081392765045166, "learning_rate": 4.7522951247359484e-05, "loss": 0.7602, "step": 7220 }, { "epoch": 0.7241949216206741, "grad_norm": 2.632668972015381, "learning_rate": 4.751610505242822e-05, "loss": 0.6545, "step": 7230 }, { "epoch": 0.7251965743476737, "grad_norm": 2.9545376300811768, "learning_rate": 4.750924990414719e-05, "loss": 0.7311, "step": 7240 }, { "epoch": 0.7261982270746732, "grad_norm": 2.353555679321289, "learning_rate": 4.7502385805242286e-05, "loss": 0.6705, "step": 7250 }, { "epoch": 0.7271998798016728, "grad_norm": 2.0468590259552, "learning_rate": 4.749551275844297e-05, "loss": 0.6565, "step": 7260 }, { "epoch": 0.7282015325286723, "grad_norm": 2.693467855453491, "learning_rate": 4.748863076648224e-05, "loss": 0.7045, "step": 7270 }, { "epoch": 0.7292031852556718, "grad_norm": 2.921161651611328, "learning_rate": 4.748173983209667e-05, "loss": 0.6481, "step": 7280 }, { "epoch": 0.7302048379826714, "grad_norm": 2.5260236263275146, "learning_rate": 4.74748399580264e-05, "loss": 0.6944, "step": 7290 }, { "epoch": 0.7312064907096709, "grad_norm": 2.695580244064331, "learning_rate": 4.746793114701508e-05, "loss": 0.7933, "step": 7300 }, { "epoch": 0.7322081434366705, "grad_norm": 1.944222092628479, "learning_rate": 4.7461013401809974e-05, "loss": 0.683, "step": 7310 }, { "epoch": 0.7332097961636701, "grad_norm": 2.3878674507141113, "learning_rate": 4.745408672516184e-05, "loss": 0.6057, "step": 7320 }, { "epoch": 0.7342114488906696, "grad_norm": 2.2860217094421387, "learning_rate": 4.744715111982504e-05, "loss": 0.6008, "step": 7330 }, { "epoch": 0.7352131016176692, "grad_norm": 2.850374937057495, "learning_rate": 4.744020658855745e-05, "loss": 0.6024, "step": 7340 }, { "epoch": 0.7362147543446687, "grad_norm": 2.4920895099639893, "learning_rate": 4.7433253134120515e-05, "loss": 0.7168, "step": 7350 }, { "epoch": 0.7372164070716682, "grad_norm": 2.360792875289917, "learning_rate": 4.742629075927921e-05, "loss": 0.6677, "step": 7360 }, { "epoch": 0.7382180597986678, "grad_norm": 2.3476812839508057, "learning_rate": 4.7419319466802074e-05, "loss": 0.7269, "step": 7370 }, { "epoch": 0.7392197125256673, "grad_norm": 2.4540038108825684, "learning_rate": 4.7412339259461194e-05, "loss": 0.7336, "step": 7380 }, { "epoch": 0.7402213652526669, "grad_norm": 2.768291473388672, "learning_rate": 4.740535014003218e-05, "loss": 0.6893, "step": 7390 }, { "epoch": 0.7412230179796665, "grad_norm": 2.2983901500701904, "learning_rate": 4.73983521112942e-05, "loss": 0.6626, "step": 7400 }, { "epoch": 0.742224670706666, "grad_norm": 3.18436336517334, "learning_rate": 4.739134517602998e-05, "loss": 0.7355, "step": 7410 }, { "epoch": 0.7432263234336656, "grad_norm": 2.746232748031616, "learning_rate": 4.738432933702575e-05, "loss": 0.6544, "step": 7420 }, { "epoch": 0.744227976160665, "grad_norm": 2.6315596103668213, "learning_rate": 4.737730459707132e-05, "loss": 0.6887, "step": 7430 }, { "epoch": 0.7452296288876646, "grad_norm": 2.708827495574951, "learning_rate": 4.737027095896002e-05, "loss": 0.6689, "step": 7440 }, { "epoch": 0.7462312816146642, "grad_norm": 2.332124948501587, "learning_rate": 4.73632284254887e-05, "loss": 0.6914, "step": 7450 }, { "epoch": 0.7472329343416637, "grad_norm": 2.743593692779541, "learning_rate": 4.73561769994578e-05, "loss": 0.7856, "step": 7460 }, { "epoch": 0.7482345870686633, "grad_norm": 2.3313615322113037, "learning_rate": 4.7349116683671247e-05, "loss": 0.6917, "step": 7470 }, { "epoch": 0.7492362397956629, "grad_norm": 2.5874178409576416, "learning_rate": 4.7342047480936516e-05, "loss": 0.6347, "step": 7480 }, { "epoch": 0.7502378925226624, "grad_norm": 2.8353426456451416, "learning_rate": 4.733496939406462e-05, "loss": 0.7379, "step": 7490 }, { "epoch": 0.751239545249662, "grad_norm": 3.2078945636749268, "learning_rate": 4.73278824258701e-05, "loss": 0.7179, "step": 7500 }, { "epoch": 0.7522411979766614, "grad_norm": 2.3015921115875244, "learning_rate": 4.732078657917105e-05, "loss": 0.745, "step": 7510 }, { "epoch": 0.753242850703661, "grad_norm": 2.150043487548828, "learning_rate": 4.7313681856789054e-05, "loss": 0.5916, "step": 7520 }, { "epoch": 0.7542445034306606, "grad_norm": 2.9892406463623047, "learning_rate": 4.7306568261549264e-05, "loss": 0.7063, "step": 7530 }, { "epoch": 0.7552461561576601, "grad_norm": 2.488659143447876, "learning_rate": 4.7299445796280345e-05, "loss": 0.7205, "step": 7540 }, { "epoch": 0.7562478088846597, "grad_norm": 2.3893909454345703, "learning_rate": 4.729231446381448e-05, "loss": 0.7307, "step": 7550 }, { "epoch": 0.7572494616116593, "grad_norm": 2.462712049484253, "learning_rate": 4.7285174266987395e-05, "loss": 0.6747, "step": 7560 }, { "epoch": 0.7582511143386588, "grad_norm": 3.0485661029815674, "learning_rate": 4.727802520863832e-05, "loss": 0.6703, "step": 7570 }, { "epoch": 0.7592527670656584, "grad_norm": 3.7530717849731445, "learning_rate": 4.727086729161003e-05, "loss": 0.689, "step": 7580 }, { "epoch": 0.7602544197926578, "grad_norm": 2.519653081893921, "learning_rate": 4.726370051874882e-05, "loss": 0.7283, "step": 7590 }, { "epoch": 0.7612560725196574, "grad_norm": 2.17928409576416, "learning_rate": 4.725652489290449e-05, "loss": 0.6723, "step": 7600 }, { "epoch": 0.762257725246657, "grad_norm": 2.4222123622894287, "learning_rate": 4.724934041693036e-05, "loss": 0.6834, "step": 7610 }, { "epoch": 0.7632593779736565, "grad_norm": 2.6158435344696045, "learning_rate": 4.724214709368331e-05, "loss": 0.5397, "step": 7620 }, { "epoch": 0.7642610307006561, "grad_norm": 2.603689193725586, "learning_rate": 4.723494492602368e-05, "loss": 0.7108, "step": 7630 }, { "epoch": 0.7652626834276556, "grad_norm": 2.116384744644165, "learning_rate": 4.722773391681536e-05, "loss": 0.6177, "step": 7640 }, { "epoch": 0.7662643361546552, "grad_norm": 2.3073859214782715, "learning_rate": 4.722051406892577e-05, "loss": 0.713, "step": 7650 }, { "epoch": 0.7672659888816548, "grad_norm": 2.8427605628967285, "learning_rate": 4.7213285385225803e-05, "loss": 0.7059, "step": 7660 }, { "epoch": 0.7682676416086542, "grad_norm": 3.5749852657318115, "learning_rate": 4.72060478685899e-05, "loss": 0.7576, "step": 7670 }, { "epoch": 0.7692692943356538, "grad_norm": 2.713200569152832, "learning_rate": 4.7198801521895985e-05, "loss": 0.7588, "step": 7680 }, { "epoch": 0.7702709470626534, "grad_norm": 3.1419548988342285, "learning_rate": 4.7191546348025526e-05, "loss": 0.7121, "step": 7690 }, { "epoch": 0.7712725997896529, "grad_norm": 2.1218645572662354, "learning_rate": 4.718428234986348e-05, "loss": 0.6079, "step": 7700 }, { "epoch": 0.7722742525166525, "grad_norm": 2.9028546810150146, "learning_rate": 4.717700953029833e-05, "loss": 0.7087, "step": 7710 }, { "epoch": 0.773275905243652, "grad_norm": 2.609346389770508, "learning_rate": 4.7169727892222023e-05, "loss": 0.6647, "step": 7720 }, { "epoch": 0.7742775579706516, "grad_norm": 2.2379143238067627, "learning_rate": 4.716243743853008e-05, "loss": 0.6019, "step": 7730 }, { "epoch": 0.7752792106976512, "grad_norm": 2.9337775707244873, "learning_rate": 4.7155138172121475e-05, "loss": 0.6523, "step": 7740 }, { "epoch": 0.7762808634246506, "grad_norm": 2.9232068061828613, "learning_rate": 4.7147830095898704e-05, "loss": 0.7737, "step": 7750 }, { "epoch": 0.7772825161516502, "grad_norm": 2.630415916442871, "learning_rate": 4.714051321276776e-05, "loss": 0.6862, "step": 7760 }, { "epoch": 0.7782841688786498, "grad_norm": 2.902622938156128, "learning_rate": 4.7133187525638156e-05, "loss": 0.7157, "step": 7770 }, { "epoch": 0.7792858216056493, "grad_norm": 2.3229422569274902, "learning_rate": 4.7125853037422885e-05, "loss": 0.6656, "step": 7780 }, { "epoch": 0.7802874743326489, "grad_norm": 3.199389934539795, "learning_rate": 4.711850975103844e-05, "loss": 0.6862, "step": 7790 }, { "epoch": 0.7812891270596484, "grad_norm": 2.6107218265533447, "learning_rate": 4.711115766940484e-05, "loss": 0.6955, "step": 7800 }, { "epoch": 0.782290779786648, "grad_norm": 2.342616558074951, "learning_rate": 4.710379679544557e-05, "loss": 0.7143, "step": 7810 }, { "epoch": 0.7832924325136476, "grad_norm": 2.832531213760376, "learning_rate": 4.709642713208762e-05, "loss": 0.6895, "step": 7820 }, { "epoch": 0.784294085240647, "grad_norm": 3.0061943531036377, "learning_rate": 4.7089048682261485e-05, "loss": 0.6991, "step": 7830 }, { "epoch": 0.7852957379676466, "grad_norm": 2.8207926750183105, "learning_rate": 4.7081661448901136e-05, "loss": 0.7095, "step": 7840 }, { "epoch": 0.7862973906946462, "grad_norm": 2.1883671283721924, "learning_rate": 4.707426543494407e-05, "loss": 0.7691, "step": 7850 }, { "epoch": 0.7872990434216457, "grad_norm": 2.096417188644409, "learning_rate": 4.7066860643331226e-05, "loss": 0.6957, "step": 7860 }, { "epoch": 0.7883006961486453, "grad_norm": 2.4886560440063477, "learning_rate": 4.705944707700708e-05, "loss": 0.7012, "step": 7870 }, { "epoch": 0.7893023488756448, "grad_norm": 2.392404079437256, "learning_rate": 4.705202473891957e-05, "loss": 0.6981, "step": 7880 }, { "epoch": 0.7903040016026444, "grad_norm": 1.8706591129302979, "learning_rate": 4.704459363202012e-05, "loss": 0.7211, "step": 7890 }, { "epoch": 0.791305654329644, "grad_norm": 2.4096221923828125, "learning_rate": 4.703715375926367e-05, "loss": 0.6804, "step": 7900 }, { "epoch": 0.7923073070566434, "grad_norm": 2.4984166622161865, "learning_rate": 4.7029705123608604e-05, "loss": 0.6623, "step": 7910 }, { "epoch": 0.793308959783643, "grad_norm": 2.8774752616882324, "learning_rate": 4.7022247728016836e-05, "loss": 0.6281, "step": 7920 }, { "epoch": 0.7943106125106426, "grad_norm": 2.1365795135498047, "learning_rate": 4.7014781575453724e-05, "loss": 0.6289, "step": 7930 }, { "epoch": 0.7953122652376421, "grad_norm": 2.5661661624908447, "learning_rate": 4.700730666888813e-05, "loss": 0.6251, "step": 7940 }, { "epoch": 0.7963139179646417, "grad_norm": 3.090698480606079, "learning_rate": 4.69998230112924e-05, "loss": 0.7738, "step": 7950 }, { "epoch": 0.7973155706916412, "grad_norm": 2.6969759464263916, "learning_rate": 4.699233060564233e-05, "loss": 0.685, "step": 7960 }, { "epoch": 0.7983172234186408, "grad_norm": 2.0235366821289062, "learning_rate": 4.698482945491722e-05, "loss": 0.6514, "step": 7970 }, { "epoch": 0.7993188761456403, "grad_norm": 3.036466360092163, "learning_rate": 4.6977319562099866e-05, "loss": 0.662, "step": 7980 }, { "epoch": 0.8003205288726398, "grad_norm": 2.331421375274658, "learning_rate": 4.696980093017649e-05, "loss": 0.6494, "step": 7990 }, { "epoch": 0.8013221815996394, "grad_norm": 2.20649790763855, "learning_rate": 4.6962273562136826e-05, "loss": 0.6467, "step": 8000 }, { "epoch": 0.802323834326639, "grad_norm": 2.333871603012085, "learning_rate": 4.6954737460974074e-05, "loss": 0.6229, "step": 8010 }, { "epoch": 0.8033254870536385, "grad_norm": 2.6986920833587646, "learning_rate": 4.69471926296849e-05, "loss": 0.6443, "step": 8020 }, { "epoch": 0.8043271397806381, "grad_norm": 2.474196434020996, "learning_rate": 4.6939639071269454e-05, "loss": 0.6341, "step": 8030 }, { "epoch": 0.8053287925076376, "grad_norm": 2.205517053604126, "learning_rate": 4.6932076788731336e-05, "loss": 0.5968, "step": 8040 }, { "epoch": 0.8063304452346371, "grad_norm": 2.337341785430908, "learning_rate": 4.6924505785077645e-05, "loss": 0.6785, "step": 8050 }, { "epoch": 0.8073320979616367, "grad_norm": 3.4576077461242676, "learning_rate": 4.6916926063318914e-05, "loss": 0.7103, "step": 8060 }, { "epoch": 0.8083337506886362, "grad_norm": 2.9819705486297607, "learning_rate": 4.6909337626469155e-05, "loss": 0.6929, "step": 8070 }, { "epoch": 0.8093354034156358, "grad_norm": 2.672879457473755, "learning_rate": 4.6901740477545874e-05, "loss": 0.6572, "step": 8080 }, { "epoch": 0.8103370561426354, "grad_norm": 3.234699010848999, "learning_rate": 4.6894134619569996e-05, "loss": 0.6925, "step": 8090 }, { "epoch": 0.8113387088696349, "grad_norm": 2.5342490673065186, "learning_rate": 4.688652005556593e-05, "loss": 0.6874, "step": 8100 }, { "epoch": 0.8123403615966345, "grad_norm": 4.199151515960693, "learning_rate": 4.687889678856156e-05, "loss": 0.7003, "step": 8110 }, { "epoch": 0.813342014323634, "grad_norm": 2.5348429679870605, "learning_rate": 4.6871264821588214e-05, "loss": 0.6502, "step": 8120 }, { "epoch": 0.8143436670506335, "grad_norm": 2.564166307449341, "learning_rate": 4.686362415768066e-05, "loss": 0.7122, "step": 8130 }, { "epoch": 0.8153453197776331, "grad_norm": 3.15704607963562, "learning_rate": 4.685597479987718e-05, "loss": 0.6718, "step": 8140 }, { "epoch": 0.8163469725046326, "grad_norm": 3.303502082824707, "learning_rate": 4.684831675121947e-05, "loss": 0.6489, "step": 8150 }, { "epoch": 0.8173486252316322, "grad_norm": 2.5538973808288574, "learning_rate": 4.6840650014752675e-05, "loss": 0.6453, "step": 8160 }, { "epoch": 0.8183502779586317, "grad_norm": 2.57222580909729, "learning_rate": 4.683297459352544e-05, "loss": 0.6897, "step": 8170 }, { "epoch": 0.8193519306856313, "grad_norm": 2.3636786937713623, "learning_rate": 4.68252904905898e-05, "loss": 0.6485, "step": 8180 }, { "epoch": 0.8203535834126309, "grad_norm": 2.2532460689544678, "learning_rate": 4.6817597709001314e-05, "loss": 0.7237, "step": 8190 }, { "epoch": 0.8213552361396304, "grad_norm": 2.7108314037323, "learning_rate": 4.680989625181893e-05, "loss": 0.6412, "step": 8200 }, { "epoch": 0.8223568888666299, "grad_norm": 2.512598991394043, "learning_rate": 4.6802186122105084e-05, "loss": 0.6971, "step": 8210 }, { "epoch": 0.8233585415936295, "grad_norm": 2.5848026275634766, "learning_rate": 4.6794467322925636e-05, "loss": 0.6953, "step": 8220 }, { "epoch": 0.824360194320629, "grad_norm": 2.2770142555236816, "learning_rate": 4.678673985734992e-05, "loss": 0.7248, "step": 8230 }, { "epoch": 0.8253618470476286, "grad_norm": 1.9408689737319946, "learning_rate": 4.677900372845069e-05, "loss": 0.6522, "step": 8240 }, { "epoch": 0.8263634997746281, "grad_norm": 1.805517315864563, "learning_rate": 4.677125893930416e-05, "loss": 0.7298, "step": 8250 }, { "epoch": 0.8273651525016277, "grad_norm": 2.5754191875457764, "learning_rate": 4.676350549298998e-05, "loss": 0.6288, "step": 8260 }, { "epoch": 0.8283668052286273, "grad_norm": 2.7753493785858154, "learning_rate": 4.675574339259125e-05, "loss": 0.6635, "step": 8270 }, { "epoch": 0.8293684579556267, "grad_norm": 2.473208427429199, "learning_rate": 4.674797264119451e-05, "loss": 0.6893, "step": 8280 }, { "epoch": 0.8303701106826263, "grad_norm": 5.45206356048584, "learning_rate": 4.674019324188973e-05, "loss": 0.7061, "step": 8290 }, { "epoch": 0.8313717634096259, "grad_norm": 2.2079737186431885, "learning_rate": 4.673240519777033e-05, "loss": 0.7112, "step": 8300 }, { "epoch": 0.8323734161366254, "grad_norm": 2.702819347381592, "learning_rate": 4.6724608511933166e-05, "loss": 0.6834, "step": 8310 }, { "epoch": 0.833375068863625, "grad_norm": 2.0012543201446533, "learning_rate": 4.671680318747852e-05, "loss": 0.6652, "step": 8320 }, { "epoch": 0.8343767215906245, "grad_norm": 2.19185209274292, "learning_rate": 4.670898922751012e-05, "loss": 0.6717, "step": 8330 }, { "epoch": 0.8353783743176241, "grad_norm": 2.6093597412109375, "learning_rate": 4.670116663513514e-05, "loss": 0.7139, "step": 8340 }, { "epoch": 0.8363800270446237, "grad_norm": 2.486466884613037, "learning_rate": 4.6693335413464156e-05, "loss": 0.6394, "step": 8350 }, { "epoch": 0.8373816797716231, "grad_norm": 2.484245777130127, "learning_rate": 4.6685495565611196e-05, "loss": 0.7008, "step": 8360 }, { "epoch": 0.8383833324986227, "grad_norm": 2.261580228805542, "learning_rate": 4.66776470946937e-05, "loss": 0.7157, "step": 8370 }, { "epoch": 0.8393849852256223, "grad_norm": 2.329270362854004, "learning_rate": 4.666979000383257e-05, "loss": 0.6162, "step": 8380 }, { "epoch": 0.8403866379526218, "grad_norm": 2.7080113887786865, "learning_rate": 4.666192429615211e-05, "loss": 0.6352, "step": 8390 }, { "epoch": 0.8413882906796214, "grad_norm": 3.4409689903259277, "learning_rate": 4.665404997478004e-05, "loss": 0.6912, "step": 8400 }, { "epoch": 0.8423899434066209, "grad_norm": 2.870689868927002, "learning_rate": 4.6646167042847545e-05, "loss": 0.6864, "step": 8410 }, { "epoch": 0.8433915961336205, "grad_norm": 3.0154449939727783, "learning_rate": 4.663827550348919e-05, "loss": 0.649, "step": 8420 }, { "epoch": 0.8443932488606201, "grad_norm": 2.5777080059051514, "learning_rate": 4.663037535984299e-05, "loss": 0.6965, "step": 8430 }, { "epoch": 0.8453949015876195, "grad_norm": 2.8088622093200684, "learning_rate": 4.6622466615050386e-05, "loss": 0.5746, "step": 8440 }, { "epoch": 0.8463965543146191, "grad_norm": 2.371058464050293, "learning_rate": 4.6614549272256216e-05, "loss": 0.6209, "step": 8450 }, { "epoch": 0.8473982070416187, "grad_norm": 3.050855875015259, "learning_rate": 4.660662333460874e-05, "loss": 0.7257, "step": 8460 }, { "epoch": 0.8483998597686182, "grad_norm": 2.714768409729004, "learning_rate": 4.659868880525966e-05, "loss": 0.6487, "step": 8470 }, { "epoch": 0.8494015124956178, "grad_norm": 2.225267171859741, "learning_rate": 4.659074568736407e-05, "loss": 0.615, "step": 8480 }, { "epoch": 0.8504031652226173, "grad_norm": 2.1432557106018066, "learning_rate": 4.658279398408049e-05, "loss": 0.6767, "step": 8490 }, { "epoch": 0.8514048179496169, "grad_norm": 2.810725688934326, "learning_rate": 4.657483369857085e-05, "loss": 0.6305, "step": 8500 }, { "epoch": 0.8524064706766165, "grad_norm": 2.351372003555298, "learning_rate": 4.65668648340005e-05, "loss": 0.6995, "step": 8510 }, { "epoch": 0.8534081234036159, "grad_norm": 2.379453420639038, "learning_rate": 4.6558887393538185e-05, "loss": 0.6905, "step": 8520 }, { "epoch": 0.8544097761306155, "grad_norm": 2.038651466369629, "learning_rate": 4.655090138035607e-05, "loss": 0.6281, "step": 8530 }, { "epoch": 0.8554114288576151, "grad_norm": 2.4542579650878906, "learning_rate": 4.654290679762975e-05, "loss": 0.6665, "step": 8540 }, { "epoch": 0.8564130815846146, "grad_norm": 2.596134662628174, "learning_rate": 4.653490364853818e-05, "loss": 0.6725, "step": 8550 }, { "epoch": 0.8574147343116142, "grad_norm": 2.8084211349487305, "learning_rate": 4.652689193626377e-05, "loss": 0.7161, "step": 8560 }, { "epoch": 0.8584163870386137, "grad_norm": 2.6709377765655518, "learning_rate": 4.651887166399229e-05, "loss": 0.7052, "step": 8570 }, { "epoch": 0.8594180397656133, "grad_norm": 2.4577574729919434, "learning_rate": 4.6510842834912966e-05, "loss": 0.6728, "step": 8580 }, { "epoch": 0.8604196924926129, "grad_norm": 3.887608766555786, "learning_rate": 4.650280545221838e-05, "loss": 0.577, "step": 8590 }, { "epoch": 0.8614213452196123, "grad_norm": 2.4187510013580322, "learning_rate": 4.649475951910454e-05, "loss": 0.7278, "step": 8600 }, { "epoch": 0.8624229979466119, "grad_norm": 2.964956760406494, "learning_rate": 4.6486705038770836e-05, "loss": 0.6633, "step": 8610 }, { "epoch": 0.8634246506736114, "grad_norm": 3.0552287101745605, "learning_rate": 4.647864201442008e-05, "loss": 0.6312, "step": 8620 }, { "epoch": 0.864426303400611, "grad_norm": 2.2803544998168945, "learning_rate": 4.647057044925847e-05, "loss": 0.6599, "step": 8630 }, { "epoch": 0.8654279561276106, "grad_norm": 2.118612766265869, "learning_rate": 4.646249034649559e-05, "loss": 0.6478, "step": 8640 }, { "epoch": 0.8664296088546101, "grad_norm": 2.494878053665161, "learning_rate": 4.645440170934443e-05, "loss": 0.7454, "step": 8650 }, { "epoch": 0.8674312615816097, "grad_norm": 3.2278289794921875, "learning_rate": 4.6446304541021384e-05, "loss": 0.6687, "step": 8660 }, { "epoch": 0.8684329143086092, "grad_norm": 3.0382726192474365, "learning_rate": 4.6438198844746216e-05, "loss": 0.7565, "step": 8670 }, { "epoch": 0.8694345670356087, "grad_norm": 3.102384328842163, "learning_rate": 4.643008462374209e-05, "loss": 0.6778, "step": 8680 }, { "epoch": 0.8704362197626083, "grad_norm": 3.0376174449920654, "learning_rate": 4.6421961881235565e-05, "loss": 0.6931, "step": 8690 }, { "epoch": 0.8714378724896078, "grad_norm": 2.6684823036193848, "learning_rate": 4.641383062045659e-05, "loss": 0.602, "step": 8700 }, { "epoch": 0.8724395252166074, "grad_norm": 2.0593066215515137, "learning_rate": 4.640569084463849e-05, "loss": 0.6166, "step": 8710 }, { "epoch": 0.873441177943607, "grad_norm": 2.141124725341797, "learning_rate": 4.639754255701798e-05, "loss": 0.5958, "step": 8720 }, { "epoch": 0.8744428306706065, "grad_norm": 2.7273616790771484, "learning_rate": 4.638938576083517e-05, "loss": 0.5938, "step": 8730 }, { "epoch": 0.875444483397606, "grad_norm": 2.745358467102051, "learning_rate": 4.638122045933353e-05, "loss": 0.6457, "step": 8740 }, { "epoch": 0.8764461361246056, "grad_norm": 2.2381041049957275, "learning_rate": 4.637304665575994e-05, "loss": 0.6258, "step": 8750 }, { "epoch": 0.8774477888516051, "grad_norm": 2.4562036991119385, "learning_rate": 4.6364864353364645e-05, "loss": 0.6121, "step": 8760 }, { "epoch": 0.8784494415786047, "grad_norm": 1.8846955299377441, "learning_rate": 4.6356673555401274e-05, "loss": 0.6452, "step": 8770 }, { "epoch": 0.8794510943056042, "grad_norm": 1.8423177003860474, "learning_rate": 4.6348474265126836e-05, "loss": 0.6773, "step": 8780 }, { "epoch": 0.8804527470326038, "grad_norm": 2.811655044555664, "learning_rate": 4.63402664858017e-05, "loss": 0.6714, "step": 8790 }, { "epoch": 0.8814543997596034, "grad_norm": 2.5115389823913574, "learning_rate": 4.633205022068963e-05, "loss": 0.6618, "step": 8800 }, { "epoch": 0.8824560524866029, "grad_norm": 2.644765615463257, "learning_rate": 4.632382547305777e-05, "loss": 0.6575, "step": 8810 }, { "epoch": 0.8834577052136025, "grad_norm": 2.793731212615967, "learning_rate": 4.6315592246176606e-05, "loss": 0.6772, "step": 8820 }, { "epoch": 0.884459357940602, "grad_norm": 1.6667630672454834, "learning_rate": 4.630735054332003e-05, "loss": 0.6543, "step": 8830 }, { "epoch": 0.8854610106676015, "grad_norm": 3.318857192993164, "learning_rate": 4.629910036776528e-05, "loss": 0.668, "step": 8840 }, { "epoch": 0.8864626633946011, "grad_norm": 2.305460214614868, "learning_rate": 4.629084172279298e-05, "loss": 0.6429, "step": 8850 }, { "epoch": 0.8874643161216006, "grad_norm": 2.154136896133423, "learning_rate": 4.628257461168711e-05, "loss": 0.6032, "step": 8860 }, { "epoch": 0.8884659688486002, "grad_norm": 2.2056803703308105, "learning_rate": 4.627429903773502e-05, "loss": 0.6624, "step": 8870 }, { "epoch": 0.8894676215755998, "grad_norm": 2.4072306156158447, "learning_rate": 4.626601500422743e-05, "loss": 0.689, "step": 8880 }, { "epoch": 0.8904692743025993, "grad_norm": 2.359260320663452, "learning_rate": 4.6257722514458426e-05, "loss": 0.6717, "step": 8890 }, { "epoch": 0.8914709270295988, "grad_norm": 2.8342933654785156, "learning_rate": 4.624942157172543e-05, "loss": 0.6374, "step": 8900 }, { "epoch": 0.8924725797565984, "grad_norm": 2.6110384464263916, "learning_rate": 4.624111217932926e-05, "loss": 0.6617, "step": 8910 }, { "epoch": 0.8934742324835979, "grad_norm": 2.4755423069000244, "learning_rate": 4.623279434057408e-05, "loss": 0.6272, "step": 8920 }, { "epoch": 0.8944758852105975, "grad_norm": 1.939832091331482, "learning_rate": 4.6224468058767414e-05, "loss": 0.6798, "step": 8930 }, { "epoch": 0.895477537937597, "grad_norm": 2.495572566986084, "learning_rate": 4.621613333722013e-05, "loss": 0.6241, "step": 8940 }, { "epoch": 0.8964791906645966, "grad_norm": 1.9067161083221436, "learning_rate": 4.620779017924648e-05, "loss": 0.5824, "step": 8950 }, { "epoch": 0.8974808433915962, "grad_norm": 2.223557472229004, "learning_rate": 4.619943858816403e-05, "loss": 0.5958, "step": 8960 }, { "epoch": 0.8984824961185957, "grad_norm": 2.710803747177124, "learning_rate": 4.619107856729376e-05, "loss": 0.6501, "step": 8970 }, { "epoch": 0.8994841488455952, "grad_norm": 2.848140001296997, "learning_rate": 4.618271011995994e-05, "loss": 0.6274, "step": 8980 }, { "epoch": 0.9004858015725948, "grad_norm": 2.5017592906951904, "learning_rate": 4.617433324949021e-05, "loss": 0.6688, "step": 8990 }, { "epoch": 0.9014874542995943, "grad_norm": 2.8840415477752686, "learning_rate": 4.616594795921558e-05, "loss": 0.6324, "step": 9000 }, { "epoch": 0.9024891070265939, "grad_norm": 2.0160205364227295, "learning_rate": 4.61575542524704e-05, "loss": 0.6435, "step": 9010 }, { "epoch": 0.9034907597535934, "grad_norm": 2.5726449489593506, "learning_rate": 4.6149152132592346e-05, "loss": 0.6423, "step": 9020 }, { "epoch": 0.904492412480593, "grad_norm": 2.614830493927002, "learning_rate": 4.6140741602922466e-05, "loss": 0.5871, "step": 9030 }, { "epoch": 0.9054940652075926, "grad_norm": 2.377392530441284, "learning_rate": 4.6132322666805125e-05, "loss": 0.6431, "step": 9040 }, { "epoch": 0.906495717934592, "grad_norm": 3.235520839691162, "learning_rate": 4.612389532758806e-05, "loss": 0.7138, "step": 9050 }, { "epoch": 0.9074973706615916, "grad_norm": 2.785496473312378, "learning_rate": 4.611545958862233e-05, "loss": 0.6979, "step": 9060 }, { "epoch": 0.9084990233885911, "grad_norm": 2.0526037216186523, "learning_rate": 4.610701545326234e-05, "loss": 0.6125, "step": 9070 }, { "epoch": 0.9095006761155907, "grad_norm": 2.4618897438049316, "learning_rate": 4.609856292486583e-05, "loss": 0.6538, "step": 9080 }, { "epoch": 0.9105023288425903, "grad_norm": 2.852219343185425, "learning_rate": 4.6090102006793875e-05, "loss": 0.5938, "step": 9090 }, { "epoch": 0.9115039815695898, "grad_norm": 2.0149478912353516, "learning_rate": 4.608163270241092e-05, "loss": 0.6686, "step": 9100 }, { "epoch": 0.9125056342965894, "grad_norm": 3.2436070442199707, "learning_rate": 4.6073155015084676e-05, "loss": 0.7302, "step": 9110 }, { "epoch": 0.913507287023589, "grad_norm": 3.005521059036255, "learning_rate": 4.606466894818625e-05, "loss": 0.6218, "step": 9120 }, { "epoch": 0.9145089397505884, "grad_norm": 2.209559679031372, "learning_rate": 4.6056174505090066e-05, "loss": 0.7016, "step": 9130 }, { "epoch": 0.915510592477588, "grad_norm": 1.9661232233047485, "learning_rate": 4.604767168917386e-05, "loss": 0.6632, "step": 9140 }, { "epoch": 0.9165122452045875, "grad_norm": 2.880694627761841, "learning_rate": 4.603916050381871e-05, "loss": 0.6389, "step": 9150 }, { "epoch": 0.9175138979315871, "grad_norm": 2.2712185382843018, "learning_rate": 4.603064095240902e-05, "loss": 0.64, "step": 9160 }, { "epoch": 0.9185155506585867, "grad_norm": 2.218029022216797, "learning_rate": 4.6022113038332534e-05, "loss": 0.5949, "step": 9170 }, { "epoch": 0.9195172033855862, "grad_norm": 2.7205657958984375, "learning_rate": 4.6013576764980293e-05, "loss": 0.6492, "step": 9180 }, { "epoch": 0.9205188561125858, "grad_norm": 2.42616605758667, "learning_rate": 4.6005032135746684e-05, "loss": 0.5645, "step": 9190 }, { "epoch": 0.9215205088395854, "grad_norm": 2.109893560409546, "learning_rate": 4.5996479154029406e-05, "loss": 0.7036, "step": 9200 }, { "epoch": 0.9225221615665848, "grad_norm": 2.179466485977173, "learning_rate": 4.59879178232295e-05, "loss": 0.6462, "step": 9210 }, { "epoch": 0.9235238142935844, "grad_norm": 2.5310440063476562, "learning_rate": 4.597934814675129e-05, "loss": 0.6169, "step": 9220 }, { "epoch": 0.9245254670205839, "grad_norm": 3.850490093231201, "learning_rate": 4.597077012800245e-05, "loss": 0.7126, "step": 9230 }, { "epoch": 0.9255271197475835, "grad_norm": 1.9445955753326416, "learning_rate": 4.596218377039397e-05, "loss": 0.6866, "step": 9240 }, { "epoch": 0.9265287724745831, "grad_norm": 2.2818901538848877, "learning_rate": 4.595358907734013e-05, "loss": 0.6533, "step": 9250 }, { "epoch": 0.9275304252015826, "grad_norm": 3.0567245483398438, "learning_rate": 4.5944986052258555e-05, "loss": 0.6954, "step": 9260 }, { "epoch": 0.9285320779285822, "grad_norm": 2.6508970260620117, "learning_rate": 4.5936374698570154e-05, "loss": 0.6175, "step": 9270 }, { "epoch": 0.9295337306555818, "grad_norm": 2.2970590591430664, "learning_rate": 4.5927755019699175e-05, "loss": 0.5485, "step": 9280 }, { "epoch": 0.9305353833825812, "grad_norm": 2.39508056640625, "learning_rate": 4.591912701907316e-05, "loss": 0.6005, "step": 9290 }, { "epoch": 0.9315370361095808, "grad_norm": 2.541376829147339, "learning_rate": 4.591049070012297e-05, "loss": 0.6507, "step": 9300 }, { "epoch": 0.9325386888365803, "grad_norm": 2.181727647781372, "learning_rate": 4.590184606628276e-05, "loss": 0.6837, "step": 9310 }, { "epoch": 0.9335403415635799, "grad_norm": 2.3271026611328125, "learning_rate": 4.589319312099001e-05, "loss": 0.6608, "step": 9320 }, { "epoch": 0.9345419942905795, "grad_norm": 2.447262763977051, "learning_rate": 4.588453186768549e-05, "loss": 0.662, "step": 9330 }, { "epoch": 0.935543647017579, "grad_norm": 2.4956793785095215, "learning_rate": 4.587586230981327e-05, "loss": 0.6311, "step": 9340 }, { "epoch": 0.9365452997445786, "grad_norm": 2.4352951049804688, "learning_rate": 4.5867184450820746e-05, "loss": 0.6221, "step": 9350 }, { "epoch": 0.9375469524715782, "grad_norm": 2.322591543197632, "learning_rate": 4.5858498294158594e-05, "loss": 0.6063, "step": 9360 }, { "epoch": 0.9385486051985776, "grad_norm": 3.020512819290161, "learning_rate": 4.584980384328078e-05, "loss": 0.6755, "step": 9370 }, { "epoch": 0.9395502579255772, "grad_norm": 2.604862928390503, "learning_rate": 4.5841101101644604e-05, "loss": 0.6647, "step": 9380 }, { "epoch": 0.9405519106525767, "grad_norm": 3.1676621437072754, "learning_rate": 4.5832390072710634e-05, "loss": 0.6585, "step": 9390 }, { "epoch": 0.9415535633795763, "grad_norm": 2.4377002716064453, "learning_rate": 4.582367075994274e-05, "loss": 0.648, "step": 9400 }, { "epoch": 0.9425552161065759, "grad_norm": 2.3394460678100586, "learning_rate": 4.581494316680809e-05, "loss": 0.5972, "step": 9410 }, { "epoch": 0.9435568688335754, "grad_norm": 2.6361212730407715, "learning_rate": 4.580620729677714e-05, "loss": 0.6573, "step": 9420 }, { "epoch": 0.944558521560575, "grad_norm": 3.2919678688049316, "learning_rate": 4.5797463153323625e-05, "loss": 0.6253, "step": 9430 }, { "epoch": 0.9455601742875746, "grad_norm": 2.6798956394195557, "learning_rate": 4.578871073992461e-05, "loss": 0.6699, "step": 9440 }, { "epoch": 0.946561827014574, "grad_norm": 3.6646926403045654, "learning_rate": 4.577995006006042e-05, "loss": 0.6509, "step": 9450 }, { "epoch": 0.9475634797415736, "grad_norm": 2.593172788619995, "learning_rate": 4.577118111721464e-05, "loss": 0.6777, "step": 9460 }, { "epoch": 0.9485651324685731, "grad_norm": 2.2215142250061035, "learning_rate": 4.576240391487421e-05, "loss": 0.6076, "step": 9470 }, { "epoch": 0.9495667851955727, "grad_norm": 2.540949583053589, "learning_rate": 4.575361845652928e-05, "loss": 0.6286, "step": 9480 }, { "epoch": 0.9505684379225723, "grad_norm": 2.5423927307128906, "learning_rate": 4.574482474567334e-05, "loss": 0.607, "step": 9490 }, { "epoch": 0.9515700906495718, "grad_norm": 2.4569149017333984, "learning_rate": 4.573602278580313e-05, "loss": 0.662, "step": 9500 }, { "epoch": 0.9525717433765714, "grad_norm": 2.495696783065796, "learning_rate": 4.572721258041868e-05, "loss": 0.5999, "step": 9510 }, { "epoch": 0.953573396103571, "grad_norm": 2.62209153175354, "learning_rate": 4.57183941330233e-05, "loss": 0.6722, "step": 9520 }, { "epoch": 0.9545750488305704, "grad_norm": 2.378762722015381, "learning_rate": 4.5709567447123577e-05, "loss": 0.6478, "step": 9530 }, { "epoch": 0.95557670155757, "grad_norm": 4.010514259338379, "learning_rate": 4.5700732526229364e-05, "loss": 0.6729, "step": 9540 }, { "epoch": 0.9565783542845695, "grad_norm": 2.489445447921753, "learning_rate": 4.5691889373853806e-05, "loss": 0.6039, "step": 9550 }, { "epoch": 0.9575800070115691, "grad_norm": 2.4253251552581787, "learning_rate": 4.56830379935133e-05, "loss": 0.5937, "step": 9560 }, { "epoch": 0.9585816597385687, "grad_norm": 2.035104513168335, "learning_rate": 4.567417838872754e-05, "loss": 0.6291, "step": 9570 }, { "epoch": 0.9595833124655682, "grad_norm": 2.7530062198638916, "learning_rate": 4.566531056301948e-05, "loss": 0.6301, "step": 9580 }, { "epoch": 0.9605849651925678, "grad_norm": 2.7969632148742676, "learning_rate": 4.565643451991533e-05, "loss": 0.6635, "step": 9590 }, { "epoch": 0.9615866179195672, "grad_norm": 3.570622444152832, "learning_rate": 4.564755026294457e-05, "loss": 0.6416, "step": 9600 }, { "epoch": 0.9625882706465668, "grad_norm": 2.4569382667541504, "learning_rate": 4.563865779563997e-05, "loss": 0.6606, "step": 9610 }, { "epoch": 0.9635899233735664, "grad_norm": 2.543860912322998, "learning_rate": 4.562975712153754e-05, "loss": 0.6704, "step": 9620 }, { "epoch": 0.9645915761005659, "grad_norm": 1.9001827239990234, "learning_rate": 4.562084824417657e-05, "loss": 0.6436, "step": 9630 }, { "epoch": 0.9655932288275655, "grad_norm": 3.1932127475738525, "learning_rate": 4.56119311670996e-05, "loss": 0.5471, "step": 9640 }, { "epoch": 0.9665948815545651, "grad_norm": 2.821469306945801, "learning_rate": 4.560300589385243e-05, "loss": 0.6969, "step": 9650 }, { "epoch": 0.9675965342815646, "grad_norm": 2.833065986633301, "learning_rate": 4.559407242798413e-05, "loss": 0.6397, "step": 9660 }, { "epoch": 0.9685981870085641, "grad_norm": 2.193622350692749, "learning_rate": 4.558513077304703e-05, "loss": 0.5948, "step": 9670 }, { "epoch": 0.9695998397355636, "grad_norm": 2.1319234371185303, "learning_rate": 4.557618093259668e-05, "loss": 0.6201, "step": 9680 }, { "epoch": 0.9706014924625632, "grad_norm": 2.4496283531188965, "learning_rate": 4.5567222910191945e-05, "loss": 0.6134, "step": 9690 }, { "epoch": 0.9716031451895628, "grad_norm": 2.4668614864349365, "learning_rate": 4.5558256709394884e-05, "loss": 0.665, "step": 9700 }, { "epoch": 0.9726047979165623, "grad_norm": 2.3747925758361816, "learning_rate": 4.554928233377086e-05, "loss": 0.6614, "step": 9710 }, { "epoch": 0.9736064506435619, "grad_norm": 2.282470941543579, "learning_rate": 4.5540299786888443e-05, "loss": 0.6946, "step": 9720 }, { "epoch": 0.9746081033705615, "grad_norm": 2.3198509216308594, "learning_rate": 4.553130907231947e-05, "loss": 0.7002, "step": 9730 }, { "epoch": 0.975609756097561, "grad_norm": 2.980468511581421, "learning_rate": 4.552231019363904e-05, "loss": 0.6124, "step": 9740 }, { "epoch": 0.9766114088245605, "grad_norm": 2.8307383060455322, "learning_rate": 4.551330315442549e-05, "loss": 0.6939, "step": 9750 }, { "epoch": 0.97761306155156, "grad_norm": 2.3871123790740967, "learning_rate": 4.5504287958260376e-05, "loss": 0.6207, "step": 9760 }, { "epoch": 0.9786147142785596, "grad_norm": 2.3071370124816895, "learning_rate": 4.5495264608728535e-05, "loss": 0.6086, "step": 9770 }, { "epoch": 0.9796163670055592, "grad_norm": 2.3521180152893066, "learning_rate": 4.548623310941802e-05, "loss": 0.7043, "step": 9780 }, { "epoch": 0.9806180197325587, "grad_norm": 2.123138904571533, "learning_rate": 4.547719346392015e-05, "loss": 0.6656, "step": 9790 }, { "epoch": 0.9816196724595583, "grad_norm": 2.3112244606018066, "learning_rate": 4.546814567582946e-05, "loss": 0.6143, "step": 9800 }, { "epoch": 0.9826213251865579, "grad_norm": 4.398324966430664, "learning_rate": 4.545908974874371e-05, "loss": 0.6633, "step": 9810 }, { "epoch": 0.9836229779135574, "grad_norm": 3.7052175998687744, "learning_rate": 4.5450025686263955e-05, "loss": 0.6285, "step": 9820 }, { "epoch": 0.9846246306405569, "grad_norm": 2.8410427570343018, "learning_rate": 4.544095349199442e-05, "loss": 0.7186, "step": 9830 }, { "epoch": 0.9856262833675564, "grad_norm": 2.3758535385131836, "learning_rate": 4.5431873169542596e-05, "loss": 0.6215, "step": 9840 }, { "epoch": 0.986627936094556, "grad_norm": 2.0713741779327393, "learning_rate": 4.542278472251921e-05, "loss": 0.6362, "step": 9850 }, { "epoch": 0.9876295888215556, "grad_norm": 2.075610637664795, "learning_rate": 4.54136881545382e-05, "loss": 0.5963, "step": 9860 }, { "epoch": 0.9886312415485551, "grad_norm": 2.8744397163391113, "learning_rate": 4.5404583469216756e-05, "loss": 0.6866, "step": 9870 }, { "epoch": 0.9896328942755547, "grad_norm": 2.514857769012451, "learning_rate": 4.539547067017528e-05, "loss": 0.6553, "step": 9880 }, { "epoch": 0.9906345470025543, "grad_norm": 2.687924385070801, "learning_rate": 4.538634976103738e-05, "loss": 0.6553, "step": 9890 }, { "epoch": 0.9916361997295537, "grad_norm": 3.2299506664276123, "learning_rate": 4.5377220745429954e-05, "loss": 0.6194, "step": 9900 }, { "epoch": 0.9926378524565533, "grad_norm": 2.1460533142089844, "learning_rate": 4.536808362698305e-05, "loss": 0.6697, "step": 9910 }, { "epoch": 0.9936395051835528, "grad_norm": 2.8532016277313232, "learning_rate": 4.535893840932999e-05, "loss": 0.6531, "step": 9920 }, { "epoch": 0.9946411579105524, "grad_norm": 2.4520788192749023, "learning_rate": 4.5349785096107275e-05, "loss": 0.6417, "step": 9930 }, { "epoch": 0.995642810637552, "grad_norm": 2.3643345832824707, "learning_rate": 4.534062369095467e-05, "loss": 0.6163, "step": 9940 }, { "epoch": 0.9966444633645515, "grad_norm": 2.5428478717803955, "learning_rate": 4.5331454197515126e-05, "loss": 0.6432, "step": 9950 }, { "epoch": 0.9976461160915511, "grad_norm": 2.155560255050659, "learning_rate": 4.5322276619434814e-05, "loss": 0.5903, "step": 9960 }, { "epoch": 0.9986477688185507, "grad_norm": 2.393480062484741, "learning_rate": 4.531309096036313e-05, "loss": 0.6368, "step": 9970 }, { "epoch": 0.9996494215455501, "grad_norm": 2.6624627113342285, "learning_rate": 4.530389722395268e-05, "loss": 0.5995, "step": 9980 }, { "epoch": 1.0006009916361998, "grad_norm": 2.460616111755371, "learning_rate": 4.529469541385928e-05, "loss": 0.6235, "step": 9990 }, { "epoch": 1.0016026443631993, "grad_norm": 2.41973614692688, "learning_rate": 4.5285485533741946e-05, "loss": 0.6751, "step": 10000 }, { "epoch": 1.0016026443631993, "eval_bleu": 0.08263337231259318, "eval_loss": 0.6337834000587463, "eval_rouge1": 0.4374259000948164, "eval_rouge2": 0.32409193917532053, "eval_rougeL": 0.41067563698366927, "eval_runtime": 106969.7121, "eval_samples_per_second": 0.166, "eval_steps_per_second": 0.021, "eval_wer": 4.417791876035146, "step": 10000 }, { "epoch": 1.002604297090199, "grad_norm": 2.241828680038452, "learning_rate": 4.527626758726292e-05, "loss": 0.5698, "step": 10010 }, { "epoch": 1.0036059498171983, "grad_norm": 2.0964417457580566, "learning_rate": 4.526704157808765e-05, "loss": 0.6447, "step": 10020 }, { "epoch": 1.004607602544198, "grad_norm": 2.1923632621765137, "learning_rate": 4.525780750988479e-05, "loss": 0.5937, "step": 10030 }, { "epoch": 1.0056092552711975, "grad_norm": 2.193221092224121, "learning_rate": 4.524856538632617e-05, "loss": 0.5717, "step": 10040 }, { "epoch": 1.006610907998197, "grad_norm": 1.8794662952423096, "learning_rate": 4.5239315211086865e-05, "loss": 0.574, "step": 10050 }, { "epoch": 1.0076125607251967, "grad_norm": 2.7534232139587402, "learning_rate": 4.523005698784514e-05, "loss": 0.607, "step": 10060 }, { "epoch": 1.008614213452196, "grad_norm": 1.9754960536956787, "learning_rate": 4.522079072028243e-05, "loss": 0.5808, "step": 10070 }, { "epoch": 1.0096158661791956, "grad_norm": 2.343451499938965, "learning_rate": 4.5211516412083416e-05, "loss": 0.6064, "step": 10080 }, { "epoch": 1.0106175189061952, "grad_norm": 2.7020604610443115, "learning_rate": 4.520223406693594e-05, "loss": 0.6037, "step": 10090 }, { "epoch": 1.0116191716331948, "grad_norm": 2.809910774230957, "learning_rate": 4.5192943688531056e-05, "loss": 0.5117, "step": 10100 }, { "epoch": 1.0126208243601944, "grad_norm": 2.667957305908203, "learning_rate": 4.5183645280563e-05, "loss": 0.583, "step": 10110 }, { "epoch": 1.0136224770871938, "grad_norm": 2.3873534202575684, "learning_rate": 4.517433884672924e-05, "loss": 0.6762, "step": 10120 }, { "epoch": 1.0146241298141934, "grad_norm": 2.1108696460723877, "learning_rate": 4.516502439073037e-05, "loss": 0.556, "step": 10130 }, { "epoch": 1.015625782541193, "grad_norm": 2.5046160221099854, "learning_rate": 4.5155701916270215e-05, "loss": 0.5785, "step": 10140 }, { "epoch": 1.0166274352681925, "grad_norm": 2.8896470069885254, "learning_rate": 4.5146371427055803e-05, "loss": 0.5929, "step": 10150 }, { "epoch": 1.0176290879951921, "grad_norm": 1.979096531867981, "learning_rate": 4.513703292679731e-05, "loss": 0.6493, "step": 10160 }, { "epoch": 1.0186307407221915, "grad_norm": 2.9013965129852295, "learning_rate": 4.5127686419208125e-05, "loss": 0.5868, "step": 10170 }, { "epoch": 1.019632393449191, "grad_norm": 1.9182281494140625, "learning_rate": 4.511833190800481e-05, "loss": 0.5256, "step": 10180 }, { "epoch": 1.0206340461761907, "grad_norm": 3.2245006561279297, "learning_rate": 4.510896939690711e-05, "loss": 0.6408, "step": 10190 }, { "epoch": 1.0216356989031903, "grad_norm": 2.499907970428467, "learning_rate": 4.509959888963795e-05, "loss": 0.5626, "step": 10200 }, { "epoch": 1.0226373516301899, "grad_norm": 1.939091682434082, "learning_rate": 4.509022038992345e-05, "loss": 0.5804, "step": 10210 }, { "epoch": 1.0236390043571895, "grad_norm": 2.0463554859161377, "learning_rate": 4.5080833901492884e-05, "loss": 0.549, "step": 10220 }, { "epoch": 1.0246406570841888, "grad_norm": 2.2921488285064697, "learning_rate": 4.507143942807872e-05, "loss": 0.6272, "step": 10230 }, { "epoch": 1.0256423098111884, "grad_norm": 2.6805219650268555, "learning_rate": 4.506203697341659e-05, "loss": 0.6537, "step": 10240 }, { "epoch": 1.026643962538188, "grad_norm": 2.7690038681030273, "learning_rate": 4.505262654124531e-05, "loss": 0.6039, "step": 10250 }, { "epoch": 1.0276456152651876, "grad_norm": 2.6545157432556152, "learning_rate": 4.504320813530687e-05, "loss": 0.554, "step": 10260 }, { "epoch": 1.0286472679921872, "grad_norm": 2.342416763305664, "learning_rate": 4.5033781759346406e-05, "loss": 0.6261, "step": 10270 }, { "epoch": 1.0296489207191866, "grad_norm": 2.439129590988159, "learning_rate": 4.502434741711226e-05, "loss": 0.6274, "step": 10280 }, { "epoch": 1.0306505734461862, "grad_norm": 3.0521368980407715, "learning_rate": 4.501490511235591e-05, "loss": 0.6079, "step": 10290 }, { "epoch": 1.0316522261731857, "grad_norm": 2.864180326461792, "learning_rate": 4.5005454848832014e-05, "loss": 0.6524, "step": 10300 }, { "epoch": 1.0326538789001853, "grad_norm": 2.0836775302886963, "learning_rate": 4.4995996630298395e-05, "loss": 0.5631, "step": 10310 }, { "epoch": 1.033655531627185, "grad_norm": 1.885198712348938, "learning_rate": 4.4986530460516054e-05, "loss": 0.5759, "step": 10320 }, { "epoch": 1.0346571843541843, "grad_norm": 1.9586544036865234, "learning_rate": 4.497705634324912e-05, "loss": 0.5971, "step": 10330 }, { "epoch": 1.035658837081184, "grad_norm": 2.376250982284546, "learning_rate": 4.49675742822649e-05, "loss": 0.6532, "step": 10340 }, { "epoch": 1.0366604898081835, "grad_norm": 2.651041269302368, "learning_rate": 4.495808428133387e-05, "loss": 0.658, "step": 10350 }, { "epoch": 1.037662142535183, "grad_norm": 2.460192918777466, "learning_rate": 4.494858634422965e-05, "loss": 0.5724, "step": 10360 }, { "epoch": 1.0386637952621827, "grad_norm": 2.251406669616699, "learning_rate": 4.493908047472901e-05, "loss": 0.5986, "step": 10370 }, { "epoch": 1.0396654479891823, "grad_norm": 2.525148868560791, "learning_rate": 4.49295666766119e-05, "loss": 0.5119, "step": 10380 }, { "epoch": 1.0406671007161816, "grad_norm": 2.9440059661865234, "learning_rate": 4.492004495366139e-05, "loss": 0.6081, "step": 10390 }, { "epoch": 1.0416687534431812, "grad_norm": 2.5194528102874756, "learning_rate": 4.491051530966372e-05, "loss": 0.5246, "step": 10400 }, { "epoch": 1.0426704061701808, "grad_norm": 2.512968063354492, "learning_rate": 4.490097774840829e-05, "loss": 0.6149, "step": 10410 }, { "epoch": 1.0436720588971804, "grad_norm": 3.91227650642395, "learning_rate": 4.489143227368763e-05, "loss": 0.6073, "step": 10420 }, { "epoch": 1.04467371162418, "grad_norm": 2.4288504123687744, "learning_rate": 4.488187888929741e-05, "loss": 0.6019, "step": 10430 }, { "epoch": 1.0456753643511794, "grad_norm": 3.138383150100708, "learning_rate": 4.487231759903647e-05, "loss": 0.6624, "step": 10440 }, { "epoch": 1.046677017078179, "grad_norm": 2.4413421154022217, "learning_rate": 4.486274840670677e-05, "loss": 0.5642, "step": 10450 }, { "epoch": 1.0476786698051785, "grad_norm": 2.7690060138702393, "learning_rate": 4.485317131611344e-05, "loss": 0.6147, "step": 10460 }, { "epoch": 1.0486803225321781, "grad_norm": 2.3034732341766357, "learning_rate": 4.484358633106471e-05, "loss": 0.5251, "step": 10470 }, { "epoch": 1.0496819752591777, "grad_norm": 2.441673994064331, "learning_rate": 4.483399345537199e-05, "loss": 0.551, "step": 10480 }, { "epoch": 1.050683627986177, "grad_norm": 2.078007936477661, "learning_rate": 4.482439269284981e-05, "loss": 0.6121, "step": 10490 }, { "epoch": 1.0516852807131767, "grad_norm": 2.7018327713012695, "learning_rate": 4.4814784047315836e-05, "loss": 0.7006, "step": 10500 }, { "epoch": 1.0526869334401763, "grad_norm": 2.6600396633148193, "learning_rate": 4.480516752259086e-05, "loss": 0.5935, "step": 10510 }, { "epoch": 1.0536885861671759, "grad_norm": 2.5032167434692383, "learning_rate": 4.4795543122498826e-05, "loss": 0.6069, "step": 10520 }, { "epoch": 1.0546902388941755, "grad_norm": 2.078220844268799, "learning_rate": 4.478591085086681e-05, "loss": 0.5766, "step": 10530 }, { "epoch": 1.055691891621175, "grad_norm": 2.401156187057495, "learning_rate": 4.477627071152498e-05, "loss": 0.6457, "step": 10540 }, { "epoch": 1.0566935443481744, "grad_norm": 2.45200514793396, "learning_rate": 4.476662270830668e-05, "loss": 0.6415, "step": 10550 }, { "epoch": 1.057695197075174, "grad_norm": 2.3690593242645264, "learning_rate": 4.4756966845048355e-05, "loss": 0.5754, "step": 10560 }, { "epoch": 1.0586968498021736, "grad_norm": 1.8617242574691772, "learning_rate": 4.474730312558959e-05, "loss": 0.5331, "step": 10570 }, { "epoch": 1.0596985025291732, "grad_norm": 2.688246250152588, "learning_rate": 4.473763155377307e-05, "loss": 0.5913, "step": 10580 }, { "epoch": 1.0607001552561728, "grad_norm": 2.441681146621704, "learning_rate": 4.472795213344464e-05, "loss": 0.6329, "step": 10590 }, { "epoch": 1.0617018079831722, "grad_norm": 1.5429996252059937, "learning_rate": 4.471826486845323e-05, "loss": 0.5835, "step": 10600 }, { "epoch": 1.0627034607101717, "grad_norm": 2.474956512451172, "learning_rate": 4.4708569762650904e-05, "loss": 0.6397, "step": 10610 }, { "epoch": 1.0637051134371713, "grad_norm": 2.1213226318359375, "learning_rate": 4.4698866819892846e-05, "loss": 0.6741, "step": 10620 }, { "epoch": 1.064706766164171, "grad_norm": 2.361389636993408, "learning_rate": 4.4689156044037363e-05, "loss": 0.6327, "step": 10630 }, { "epoch": 1.0657084188911705, "grad_norm": 2.966655731201172, "learning_rate": 4.4679437438945855e-05, "loss": 0.6742, "step": 10640 }, { "epoch": 1.0667100716181699, "grad_norm": 2.1844279766082764, "learning_rate": 4.466971100848285e-05, "loss": 0.6256, "step": 10650 }, { "epoch": 1.0677117243451695, "grad_norm": 2.297964334487915, "learning_rate": 4.465997675651599e-05, "loss": 0.6274, "step": 10660 }, { "epoch": 1.068713377072169, "grad_norm": 2.126434087753296, "learning_rate": 4.465023468691601e-05, "loss": 0.6694, "step": 10670 }, { "epoch": 1.0697150297991687, "grad_norm": 2.612492799758911, "learning_rate": 4.464048480355679e-05, "loss": 0.5821, "step": 10680 }, { "epoch": 1.0707166825261683, "grad_norm": 2.2019944190979004, "learning_rate": 4.463072711031529e-05, "loss": 0.6195, "step": 10690 }, { "epoch": 1.0717183352531676, "grad_norm": 2.5340359210968018, "learning_rate": 4.462096161107154e-05, "loss": 0.67, "step": 10700 }, { "epoch": 1.0727199879801672, "grad_norm": 2.8773465156555176, "learning_rate": 4.4611188309708766e-05, "loss": 0.5325, "step": 10710 }, { "epoch": 1.0737216407071668, "grad_norm": 2.5338494777679443, "learning_rate": 4.460140721011321e-05, "loss": 0.674, "step": 10720 }, { "epoch": 1.0747232934341664, "grad_norm": 2.4730725288391113, "learning_rate": 4.459161831617426e-05, "loss": 0.5787, "step": 10730 }, { "epoch": 1.075724946161166, "grad_norm": 1.9959347248077393, "learning_rate": 4.458182163178439e-05, "loss": 0.5417, "step": 10740 }, { "epoch": 1.0767265988881656, "grad_norm": 2.4486429691314697, "learning_rate": 4.4572017160839176e-05, "loss": 0.6154, "step": 10750 }, { "epoch": 1.077728251615165, "grad_norm": 2.2543139457702637, "learning_rate": 4.4562204907237274e-05, "loss": 0.5951, "step": 10760 }, { "epoch": 1.0787299043421645, "grad_norm": 4.040517807006836, "learning_rate": 4.455238487488047e-05, "loss": 0.5494, "step": 10770 }, { "epoch": 1.0797315570691641, "grad_norm": 2.089376926422119, "learning_rate": 4.454255706767361e-05, "loss": 0.5377, "step": 10780 }, { "epoch": 1.0807332097961637, "grad_norm": 2.3366119861602783, "learning_rate": 4.453272148952464e-05, "loss": 0.6139, "step": 10790 }, { "epoch": 1.0817348625231633, "grad_norm": 2.543785333633423, "learning_rate": 4.4522878144344606e-05, "loss": 0.5641, "step": 10800 }, { "epoch": 1.0827365152501627, "grad_norm": 2.731062173843384, "learning_rate": 4.451302703604763e-05, "loss": 0.6759, "step": 10810 }, { "epoch": 1.0837381679771623, "grad_norm": 2.7335755825042725, "learning_rate": 4.4503168168550934e-05, "loss": 0.6713, "step": 10820 }, { "epoch": 1.0847398207041619, "grad_norm": 2.4380624294281006, "learning_rate": 4.449330154577481e-05, "loss": 0.6745, "step": 10830 }, { "epoch": 1.0857414734311615, "grad_norm": 2.1496427059173584, "learning_rate": 4.4483427171642643e-05, "loss": 0.6305, "step": 10840 }, { "epoch": 1.086743126158161, "grad_norm": 2.2780048847198486, "learning_rate": 4.4473545050080915e-05, "loss": 0.5717, "step": 10850 }, { "epoch": 1.0877447788851604, "grad_norm": 2.7438337802886963, "learning_rate": 4.446365518501915e-05, "loss": 0.5682, "step": 10860 }, { "epoch": 1.08874643161216, "grad_norm": 2.0671546459198, "learning_rate": 4.4453757580389984e-05, "loss": 0.6222, "step": 10870 }, { "epoch": 1.0897480843391596, "grad_norm": 2.4361698627471924, "learning_rate": 4.444385224012912e-05, "loss": 0.5047, "step": 10880 }, { "epoch": 1.0907497370661592, "grad_norm": 2.4023778438568115, "learning_rate": 4.443393916817535e-05, "loss": 0.5796, "step": 10890 }, { "epoch": 1.0917513897931588, "grad_norm": 2.4326682090759277, "learning_rate": 4.442401836847051e-05, "loss": 0.5218, "step": 10900 }, { "epoch": 1.0927530425201581, "grad_norm": 1.928910493850708, "learning_rate": 4.441408984495953e-05, "loss": 0.5586, "step": 10910 }, { "epoch": 1.0937546952471577, "grad_norm": 2.215165138244629, "learning_rate": 4.4404153601590415e-05, "loss": 0.5602, "step": 10920 }, { "epoch": 1.0947563479741573, "grad_norm": 2.9248368740081787, "learning_rate": 4.439420964231422e-05, "loss": 0.6545, "step": 10930 }, { "epoch": 1.095758000701157, "grad_norm": 2.902667999267578, "learning_rate": 4.43842579710851e-05, "loss": 0.5476, "step": 10940 }, { "epoch": 1.0967596534281565, "grad_norm": 2.431466579437256, "learning_rate": 4.437429859186025e-05, "loss": 0.5683, "step": 10950 }, { "epoch": 1.097761306155156, "grad_norm": 1.8508896827697754, "learning_rate": 4.436433150859993e-05, "loss": 0.5298, "step": 10960 }, { "epoch": 1.0987629588821555, "grad_norm": 2.972052812576294, "learning_rate": 4.4354356725267485e-05, "loss": 0.6163, "step": 10970 }, { "epoch": 1.099764611609155, "grad_norm": 3.3024284839630127, "learning_rate": 4.4344374245829294e-05, "loss": 0.5949, "step": 10980 }, { "epoch": 1.1007662643361547, "grad_norm": 2.4315567016601562, "learning_rate": 4.433438407425482e-05, "loss": 0.5789, "step": 10990 }, { "epoch": 1.1017679170631542, "grad_norm": 2.4305036067962646, "learning_rate": 4.4324386214516576e-05, "loss": 0.5941, "step": 11000 }, { "epoch": 1.1027695697901538, "grad_norm": 2.330115795135498, "learning_rate": 4.4314380670590125e-05, "loss": 0.6021, "step": 11010 }, { "epoch": 1.1037712225171532, "grad_norm": 2.429572582244873, "learning_rate": 4.4304367446454084e-05, "loss": 0.5992, "step": 11020 }, { "epoch": 1.1047728752441528, "grad_norm": 2.6118149757385254, "learning_rate": 4.429434654609016e-05, "loss": 0.5482, "step": 11030 }, { "epoch": 1.1057745279711524, "grad_norm": 2.079375982284546, "learning_rate": 4.428431797348306e-05, "loss": 0.6004, "step": 11040 }, { "epoch": 1.106776180698152, "grad_norm": 2.0178675651550293, "learning_rate": 4.4274281732620574e-05, "loss": 0.5709, "step": 11050 }, { "epoch": 1.1077778334251516, "grad_norm": 2.3021092414855957, "learning_rate": 4.426423782749352e-05, "loss": 0.5602, "step": 11060 }, { "epoch": 1.1087794861521512, "grad_norm": 2.4679481983184814, "learning_rate": 4.42541862620958e-05, "loss": 0.6025, "step": 11070 }, { "epoch": 1.1097811388791505, "grad_norm": 2.0906708240509033, "learning_rate": 4.424412704042432e-05, "loss": 0.5415, "step": 11080 }, { "epoch": 1.1107827916061501, "grad_norm": 2.103607416152954, "learning_rate": 4.423406016647906e-05, "loss": 0.5777, "step": 11090 }, { "epoch": 1.1117844443331497, "grad_norm": 2.2250356674194336, "learning_rate": 4.422398564426303e-05, "loss": 0.5818, "step": 11100 }, { "epoch": 1.1127860970601493, "grad_norm": 2.02903151512146, "learning_rate": 4.421390347778228e-05, "loss": 0.6417, "step": 11110 }, { "epoch": 1.113787749787149, "grad_norm": 2.283987283706665, "learning_rate": 4.420381367104591e-05, "loss": 0.603, "step": 11120 }, { "epoch": 1.1147894025141483, "grad_norm": 2.4817943572998047, "learning_rate": 4.419371622806604e-05, "loss": 0.5143, "step": 11130 }, { "epoch": 1.1157910552411479, "grad_norm": 1.9237391948699951, "learning_rate": 4.4183611152857854e-05, "loss": 0.5663, "step": 11140 }, { "epoch": 1.1167927079681474, "grad_norm": 2.5406248569488525, "learning_rate": 4.417349844943953e-05, "loss": 0.564, "step": 11150 }, { "epoch": 1.117794360695147, "grad_norm": 2.506094217300415, "learning_rate": 4.416337812183233e-05, "loss": 0.5951, "step": 11160 }, { "epoch": 1.1187960134221466, "grad_norm": 2.317397117614746, "learning_rate": 4.415325017406051e-05, "loss": 0.5939, "step": 11170 }, { "epoch": 1.119797666149146, "grad_norm": 2.1029767990112305, "learning_rate": 4.4143114610151374e-05, "loss": 0.5199, "step": 11180 }, { "epoch": 1.1207993188761456, "grad_norm": 2.195216417312622, "learning_rate": 4.413297143413523e-05, "loss": 0.6576, "step": 11190 }, { "epoch": 1.1218009716031452, "grad_norm": 2.5397441387176514, "learning_rate": 4.412282065004546e-05, "loss": 0.5166, "step": 11200 }, { "epoch": 1.1228026243301448, "grad_norm": 2.8908276557922363, "learning_rate": 4.4112662261918415e-05, "loss": 0.6163, "step": 11210 }, { "epoch": 1.1238042770571444, "grad_norm": 2.3265693187713623, "learning_rate": 4.41024962737935e-05, "loss": 0.5597, "step": 11220 }, { "epoch": 1.1248059297841437, "grad_norm": 1.9278279542922974, "learning_rate": 4.4092322689713164e-05, "loss": 0.6404, "step": 11230 }, { "epoch": 1.1258075825111433, "grad_norm": 2.3198344707489014, "learning_rate": 4.408214151372283e-05, "loss": 0.6297, "step": 11240 }, { "epoch": 1.126809235238143, "grad_norm": 2.27579402923584, "learning_rate": 4.407195274987096e-05, "loss": 0.5715, "step": 11250 }, { "epoch": 1.1278108879651425, "grad_norm": 2.439666748046875, "learning_rate": 4.4061756402209047e-05, "loss": 0.6156, "step": 11260 }, { "epoch": 1.128812540692142, "grad_norm": 2.574446201324463, "learning_rate": 4.4051552474791585e-05, "loss": 0.6065, "step": 11270 }, { "epoch": 1.1298141934191417, "grad_norm": 2.493682384490967, "learning_rate": 4.404134097167608e-05, "loss": 0.517, "step": 11280 }, { "epoch": 1.130815846146141, "grad_norm": 2.36503529548645, "learning_rate": 4.403112189692305e-05, "loss": 0.5445, "step": 11290 }, { "epoch": 1.1318174988731406, "grad_norm": 2.6429598331451416, "learning_rate": 4.402089525459604e-05, "loss": 0.5817, "step": 11300 }, { "epoch": 1.1328191516001402, "grad_norm": 2.0238144397735596, "learning_rate": 4.401066104876158e-05, "loss": 0.6503, "step": 11310 }, { "epoch": 1.1338208043271398, "grad_norm": 1.9457943439483643, "learning_rate": 4.4000419283489234e-05, "loss": 0.5834, "step": 11320 }, { "epoch": 1.1348224570541394, "grad_norm": 3.049051523208618, "learning_rate": 4.3990169962851556e-05, "loss": 0.5707, "step": 11330 }, { "epoch": 1.1358241097811388, "grad_norm": 2.3645284175872803, "learning_rate": 4.39799130909241e-05, "loss": 0.6106, "step": 11340 }, { "epoch": 1.1368257625081384, "grad_norm": 2.4572527408599854, "learning_rate": 4.3969648671785427e-05, "loss": 0.576, "step": 11350 }, { "epoch": 1.137827415235138, "grad_norm": 2.7629027366638184, "learning_rate": 4.395937670951712e-05, "loss": 0.5461, "step": 11360 }, { "epoch": 1.1388290679621376, "grad_norm": 1.8488332033157349, "learning_rate": 4.3949097208203715e-05, "loss": 0.5847, "step": 11370 }, { "epoch": 1.1398307206891372, "grad_norm": 2.3692805767059326, "learning_rate": 4.3938810171932795e-05, "loss": 0.58, "step": 11380 }, { "epoch": 1.1408323734161367, "grad_norm": 2.6989333629608154, "learning_rate": 4.392851560479492e-05, "loss": 0.5461, "step": 11390 }, { "epoch": 1.1418340261431361, "grad_norm": 2.201425552368164, "learning_rate": 4.3918213510883624e-05, "loss": 0.5628, "step": 11400 }, { "epoch": 1.1428356788701357, "grad_norm": 2.2745614051818848, "learning_rate": 4.390790389429546e-05, "loss": 0.5372, "step": 11410 }, { "epoch": 1.1438373315971353, "grad_norm": 1.8337105512619019, "learning_rate": 4.3897586759129975e-05, "loss": 0.5933, "step": 11420 }, { "epoch": 1.144838984324135, "grad_norm": 2.181349992752075, "learning_rate": 4.388726210948969e-05, "loss": 0.6146, "step": 11430 }, { "epoch": 1.1458406370511343, "grad_norm": 2.9680917263031006, "learning_rate": 4.387692994948012e-05, "loss": 0.6383, "step": 11440 }, { "epoch": 1.1468422897781338, "grad_norm": 2.7716708183288574, "learning_rate": 4.386659028320975e-05, "loss": 0.6112, "step": 11450 }, { "epoch": 1.1478439425051334, "grad_norm": 2.357621908187866, "learning_rate": 4.385624311479009e-05, "loss": 0.5798, "step": 11460 }, { "epoch": 1.148845595232133, "grad_norm": 2.055521249771118, "learning_rate": 4.3845888448335596e-05, "loss": 0.5666, "step": 11470 }, { "epoch": 1.1498472479591326, "grad_norm": 2.8949215412139893, "learning_rate": 4.3835526287963726e-05, "loss": 0.5668, "step": 11480 }, { "epoch": 1.1508489006861322, "grad_norm": 2.6228830814361572, "learning_rate": 4.38251566377949e-05, "loss": 0.6357, "step": 11490 }, { "epoch": 1.1518505534131316, "grad_norm": 2.118854284286499, "learning_rate": 4.381477950195254e-05, "loss": 0.5745, "step": 11500 }, { "epoch": 1.1528522061401312, "grad_norm": 2.6163575649261475, "learning_rate": 4.380439488456301e-05, "loss": 0.5824, "step": 11510 }, { "epoch": 1.1538538588671308, "grad_norm": 2.5866751670837402, "learning_rate": 4.3794002789755705e-05, "loss": 0.6282, "step": 11520 }, { "epoch": 1.1548555115941304, "grad_norm": 1.9551018476486206, "learning_rate": 4.3783603221662925e-05, "loss": 0.5497, "step": 11530 }, { "epoch": 1.15585716432113, "grad_norm": 2.2837166786193848, "learning_rate": 4.3773196184419996e-05, "loss": 0.5256, "step": 11540 }, { "epoch": 1.1568588170481293, "grad_norm": 2.673213005065918, "learning_rate": 4.376278168216518e-05, "loss": 0.638, "step": 11550 }, { "epoch": 1.157860469775129, "grad_norm": 2.9592199325561523, "learning_rate": 4.375235971903973e-05, "loss": 0.5934, "step": 11560 }, { "epoch": 1.1588621225021285, "grad_norm": 2.324190855026245, "learning_rate": 4.374193029918786e-05, "loss": 0.5472, "step": 11570 }, { "epoch": 1.159863775229128, "grad_norm": 3.0013883113861084, "learning_rate": 4.3731493426756734e-05, "loss": 0.5461, "step": 11580 }, { "epoch": 1.1608654279561277, "grad_norm": 2.3880512714385986, "learning_rate": 4.372104910589649e-05, "loss": 0.6383, "step": 11590 }, { "epoch": 1.1618670806831273, "grad_norm": 2.222216844558716, "learning_rate": 4.371059734076024e-05, "loss": 0.5615, "step": 11600 }, { "epoch": 1.1628687334101266, "grad_norm": 2.4552252292633057, "learning_rate": 4.3700138135504044e-05, "loss": 0.5334, "step": 11610 }, { "epoch": 1.1638703861371262, "grad_norm": 2.393084764480591, "learning_rate": 4.3689671494286913e-05, "loss": 0.6087, "step": 11620 }, { "epoch": 1.1648720388641258, "grad_norm": 2.7061352729797363, "learning_rate": 4.367919742127083e-05, "loss": 0.5929, "step": 11630 }, { "epoch": 1.1658736915911254, "grad_norm": 2.2179784774780273, "learning_rate": 4.366871592062073e-05, "loss": 0.5751, "step": 11640 }, { "epoch": 1.1668753443181248, "grad_norm": 2.616370677947998, "learning_rate": 4.3658226996504494e-05, "loss": 0.6019, "step": 11650 }, { "epoch": 1.1678769970451244, "grad_norm": 2.4288666248321533, "learning_rate": 4.364773065309296e-05, "loss": 0.5673, "step": 11660 }, { "epoch": 1.168878649772124, "grad_norm": 2.0479538440704346, "learning_rate": 4.36372268945599e-05, "loss": 0.6108, "step": 11670 }, { "epoch": 1.1698803024991236, "grad_norm": 2.7070276737213135, "learning_rate": 4.362671572508207e-05, "loss": 0.5864, "step": 11680 }, { "epoch": 1.1708819552261232, "grad_norm": 3.1016364097595215, "learning_rate": 4.3616197148839155e-05, "loss": 0.5442, "step": 11690 }, { "epoch": 1.1718836079531227, "grad_norm": 2.680513620376587, "learning_rate": 4.360567117001377e-05, "loss": 0.5797, "step": 11700 }, { "epoch": 1.172885260680122, "grad_norm": 1.764623999595642, "learning_rate": 4.359513779279149e-05, "loss": 0.5252, "step": 11710 }, { "epoch": 1.1738869134071217, "grad_norm": 2.167689323425293, "learning_rate": 4.358459702136083e-05, "loss": 0.5704, "step": 11720 }, { "epoch": 1.1748885661341213, "grad_norm": 3.4920172691345215, "learning_rate": 4.3574048859913247e-05, "loss": 0.6426, "step": 11730 }, { "epoch": 1.1758902188611209, "grad_norm": 2.359330415725708, "learning_rate": 4.3563493312643125e-05, "loss": 0.6073, "step": 11740 }, { "epoch": 1.1768918715881205, "grad_norm": 2.428713798522949, "learning_rate": 4.3552930383747806e-05, "loss": 0.5856, "step": 11750 }, { "epoch": 1.1778935243151198, "grad_norm": 2.476402521133423, "learning_rate": 4.354236007742754e-05, "loss": 0.5963, "step": 11760 }, { "epoch": 1.1788951770421194, "grad_norm": 2.6675846576690674, "learning_rate": 4.3531782397885534e-05, "loss": 0.5157, "step": 11770 }, { "epoch": 1.179896829769119, "grad_norm": 1.9533215761184692, "learning_rate": 4.352119734932791e-05, "loss": 0.6073, "step": 11780 }, { "epoch": 1.1808984824961186, "grad_norm": 2.8943727016448975, "learning_rate": 4.351060493596375e-05, "loss": 0.6105, "step": 11790 }, { "epoch": 1.1819001352231182, "grad_norm": 3.005053758621216, "learning_rate": 4.350000516200501e-05, "loss": 0.557, "step": 11800 }, { "epoch": 1.1829017879501178, "grad_norm": 2.9034831523895264, "learning_rate": 4.348939803166664e-05, "loss": 0.5801, "step": 11810 }, { "epoch": 1.1839034406771172, "grad_norm": 2.1721954345703125, "learning_rate": 4.347878354916645e-05, "loss": 0.6232, "step": 11820 }, { "epoch": 1.1849050934041168, "grad_norm": 2.2449421882629395, "learning_rate": 4.346816171872522e-05, "loss": 0.5813, "step": 11830 }, { "epoch": 1.1859067461311164, "grad_norm": 2.2640445232391357, "learning_rate": 4.345753254456663e-05, "loss": 0.6179, "step": 11840 }, { "epoch": 1.186908398858116, "grad_norm": 1.8900947570800781, "learning_rate": 4.34468960309173e-05, "loss": 0.5509, "step": 11850 }, { "epoch": 1.1879100515851155, "grad_norm": 2.5752103328704834, "learning_rate": 4.343625218200674e-05, "loss": 0.6068, "step": 11860 }, { "epoch": 1.188911704312115, "grad_norm": 2.4647738933563232, "learning_rate": 4.342560100206739e-05, "loss": 0.5853, "step": 11870 }, { "epoch": 1.1899133570391145, "grad_norm": 2.0032401084899902, "learning_rate": 4.3414942495334634e-05, "loss": 0.5757, "step": 11880 }, { "epoch": 1.190915009766114, "grad_norm": 2.42807674407959, "learning_rate": 4.340427666604671e-05, "loss": 0.6069, "step": 11890 }, { "epoch": 1.1919166624931137, "grad_norm": 2.145582914352417, "learning_rate": 4.3393603518444803e-05, "loss": 0.5653, "step": 11900 }, { "epoch": 1.1929183152201133, "grad_norm": 2.337334632873535, "learning_rate": 4.338292305677303e-05, "loss": 0.5728, "step": 11910 }, { "epoch": 1.1939199679471129, "grad_norm": 2.395955801010132, "learning_rate": 4.337223528527836e-05, "loss": 0.5418, "step": 11920 }, { "epoch": 1.1949216206741122, "grad_norm": 2.5528903007507324, "learning_rate": 4.3361540208210725e-05, "loss": 0.5833, "step": 11930 }, { "epoch": 1.1959232734011118, "grad_norm": 1.9089552164077759, "learning_rate": 4.335083782982293e-05, "loss": 0.556, "step": 11940 }, { "epoch": 1.1969249261281114, "grad_norm": 2.391150951385498, "learning_rate": 4.334012815437069e-05, "loss": 0.6625, "step": 11950 }, { "epoch": 1.197926578855111, "grad_norm": 2.3361966609954834, "learning_rate": 4.3329411186112616e-05, "loss": 0.5594, "step": 11960 }, { "epoch": 1.1989282315821104, "grad_norm": 1.8724137544631958, "learning_rate": 4.3318686929310235e-05, "loss": 0.5774, "step": 11970 }, { "epoch": 1.19992988430911, "grad_norm": 2.7264559268951416, "learning_rate": 4.330795538822795e-05, "loss": 0.5614, "step": 11980 }, { "epoch": 1.2009315370361096, "grad_norm": 2.320357322692871, "learning_rate": 4.3297216567133085e-05, "loss": 0.5901, "step": 11990 }, { "epoch": 1.2019331897631091, "grad_norm": 2.6622066497802734, "learning_rate": 4.328647047029584e-05, "loss": 0.6711, "step": 12000 }, { "epoch": 1.2029348424901087, "grad_norm": 2.5785880088806152, "learning_rate": 4.3275717101989316e-05, "loss": 0.6132, "step": 12010 }, { "epoch": 1.2039364952171083, "grad_norm": 2.7682085037231445, "learning_rate": 4.3264956466489504e-05, "loss": 0.5557, "step": 12020 }, { "epoch": 1.2049381479441077, "grad_norm": 2.4161806106567383, "learning_rate": 4.325418856807529e-05, "loss": 0.6732, "step": 12030 }, { "epoch": 1.2059398006711073, "grad_norm": 2.803616523742676, "learning_rate": 4.324341341102843e-05, "loss": 0.5855, "step": 12040 }, { "epoch": 1.2069414533981069, "grad_norm": 2.3754525184631348, "learning_rate": 4.3232630999633595e-05, "loss": 0.6923, "step": 12050 }, { "epoch": 1.2079431061251065, "grad_norm": 2.4744246006011963, "learning_rate": 4.3221841338178316e-05, "loss": 0.646, "step": 12060 }, { "epoch": 1.208944758852106, "grad_norm": 2.2677700519561768, "learning_rate": 4.321104443095302e-05, "loss": 0.5519, "step": 12070 }, { "epoch": 1.2099464115791054, "grad_norm": 2.31657075881958, "learning_rate": 4.3200240282251005e-05, "loss": 0.5602, "step": 12080 }, { "epoch": 1.210948064306105, "grad_norm": 2.340644598007202, "learning_rate": 4.3189428896368456e-05, "loss": 0.6619, "step": 12090 }, { "epoch": 1.2119497170331046, "grad_norm": 2.118570566177368, "learning_rate": 4.317861027760444e-05, "loss": 0.5768, "step": 12100 }, { "epoch": 1.2129513697601042, "grad_norm": 2.9038562774658203, "learning_rate": 4.3167784430260895e-05, "loss": 0.5889, "step": 12110 }, { "epoch": 1.2139530224871038, "grad_norm": 2.5005056858062744, "learning_rate": 4.3156951358642626e-05, "loss": 0.5664, "step": 12120 }, { "epoch": 1.2149546752141034, "grad_norm": 2.4353575706481934, "learning_rate": 4.314611106705732e-05, "loss": 0.6251, "step": 12130 }, { "epoch": 1.2159563279411028, "grad_norm": 2.5719215869903564, "learning_rate": 4.313526355981554e-05, "loss": 0.6044, "step": 12140 }, { "epoch": 1.2169579806681023, "grad_norm": 2.214345932006836, "learning_rate": 4.3124408841230696e-05, "loss": 0.5722, "step": 12150 }, { "epoch": 1.217959633395102, "grad_norm": 2.1811490058898926, "learning_rate": 4.3113546915619095e-05, "loss": 0.5788, "step": 12160 }, { "epoch": 1.2189612861221015, "grad_norm": 2.520916700363159, "learning_rate": 4.3102677787299886e-05, "loss": 0.6075, "step": 12170 }, { "epoch": 1.219962938849101, "grad_norm": 2.66408634185791, "learning_rate": 4.309180146059509e-05, "loss": 0.5751, "step": 12180 }, { "epoch": 1.2209645915761005, "grad_norm": 2.4396579265594482, "learning_rate": 4.3080917939829604e-05, "loss": 0.5761, "step": 12190 }, { "epoch": 1.2219662443031, "grad_norm": 1.9858520030975342, "learning_rate": 4.3070027229331155e-05, "loss": 0.617, "step": 12200 }, { "epoch": 1.2229678970300997, "grad_norm": 2.1379098892211914, "learning_rate": 4.305912933343037e-05, "loss": 0.5834, "step": 12210 }, { "epoch": 1.2239695497570993, "grad_norm": 2.563939094543457, "learning_rate": 4.304822425646069e-05, "loss": 0.569, "step": 12220 }, { "epoch": 1.2249712024840989, "grad_norm": 2.471806287765503, "learning_rate": 4.303731200275844e-05, "loss": 0.5577, "step": 12230 }, { "epoch": 1.2259728552110982, "grad_norm": 2.36328387260437, "learning_rate": 4.302639257666279e-05, "loss": 0.6297, "step": 12240 }, { "epoch": 1.2269745079380978, "grad_norm": 2.4426753520965576, "learning_rate": 4.3015465982515765e-05, "loss": 0.5597, "step": 12250 }, { "epoch": 1.2279761606650974, "grad_norm": 2.5594425201416016, "learning_rate": 4.300453222466224e-05, "loss": 0.5633, "step": 12260 }, { "epoch": 1.228977813392097, "grad_norm": 2.1706836223602295, "learning_rate": 4.299359130744993e-05, "loss": 0.5809, "step": 12270 }, { "epoch": 1.2299794661190966, "grad_norm": 2.1926965713500977, "learning_rate": 4.298264323522941e-05, "loss": 0.5436, "step": 12280 }, { "epoch": 1.230981118846096, "grad_norm": 3.1730353832244873, "learning_rate": 4.297168801235409e-05, "loss": 0.6721, "step": 12290 }, { "epoch": 1.2319827715730955, "grad_norm": 2.2085063457489014, "learning_rate": 4.296072564318023e-05, "loss": 0.5455, "step": 12300 }, { "epoch": 1.2329844243000951, "grad_norm": 2.361359119415283, "learning_rate": 4.2949756132066924e-05, "loss": 0.551, "step": 12310 }, { "epoch": 1.2339860770270947, "grad_norm": 2.3133087158203125, "learning_rate": 4.29387794833761e-05, "loss": 0.6182, "step": 12320 }, { "epoch": 1.2349877297540943, "grad_norm": 2.7090024948120117, "learning_rate": 4.2927795701472564e-05, "loss": 0.6011, "step": 12330 }, { "epoch": 1.235989382481094, "grad_norm": 2.7313599586486816, "learning_rate": 4.291680479072391e-05, "loss": 0.6484, "step": 12340 }, { "epoch": 1.2369910352080933, "grad_norm": 2.4200663566589355, "learning_rate": 4.290580675550059e-05, "loss": 0.6089, "step": 12350 }, { "epoch": 1.2379926879350929, "grad_norm": 2.2249794006347656, "learning_rate": 4.2894801600175885e-05, "loss": 0.609, "step": 12360 }, { "epoch": 1.2389943406620925, "grad_norm": 2.8315131664276123, "learning_rate": 4.2883789329125894e-05, "loss": 0.551, "step": 12370 }, { "epoch": 1.239995993389092, "grad_norm": 2.9285271167755127, "learning_rate": 4.287276994672959e-05, "loss": 0.5966, "step": 12380 }, { "epoch": 1.2409976461160916, "grad_norm": 2.0900344848632812, "learning_rate": 4.286174345736871e-05, "loss": 0.488, "step": 12390 }, { "epoch": 1.241999298843091, "grad_norm": 2.371091604232788, "learning_rate": 4.285070986542787e-05, "loss": 0.5586, "step": 12400 }, { "epoch": 1.2430009515700906, "grad_norm": 2.1431174278259277, "learning_rate": 4.283966917529448e-05, "loss": 0.5699, "step": 12410 }, { "epoch": 1.2440026042970902, "grad_norm": 2.3268375396728516, "learning_rate": 4.282862139135879e-05, "loss": 0.5179, "step": 12420 }, { "epoch": 1.2450042570240898, "grad_norm": 2.512864112854004, "learning_rate": 4.281756651801386e-05, "loss": 0.6027, "step": 12430 }, { "epoch": 1.2460059097510894, "grad_norm": 3.174145221710205, "learning_rate": 4.280650455965557e-05, "loss": 0.5509, "step": 12440 }, { "epoch": 1.247007562478089, "grad_norm": 2.1517832279205322, "learning_rate": 4.279543552068263e-05, "loss": 0.618, "step": 12450 }, { "epoch": 1.2480092152050883, "grad_norm": 2.6362404823303223, "learning_rate": 4.278435940549653e-05, "loss": 0.5599, "step": 12460 }, { "epoch": 1.249010867932088, "grad_norm": 2.2011988162994385, "learning_rate": 4.277327621850162e-05, "loss": 0.5423, "step": 12470 }, { "epoch": 1.2500125206590875, "grad_norm": 2.244523286819458, "learning_rate": 4.2762185964105045e-05, "loss": 0.5017, "step": 12480 }, { "epoch": 1.2510141733860871, "grad_norm": 2.230452060699463, "learning_rate": 4.275108864671674e-05, "loss": 0.5543, "step": 12490 }, { "epoch": 1.2520158261130865, "grad_norm": 2.8010337352752686, "learning_rate": 4.273998427074948e-05, "loss": 0.6166, "step": 12500 }, { "epoch": 1.253017478840086, "grad_norm": 2.4748897552490234, "learning_rate": 4.2728872840618814e-05, "loss": 0.61, "step": 12510 }, { "epoch": 1.2540191315670857, "grad_norm": 2.357698917388916, "learning_rate": 4.271775436074313e-05, "loss": 0.5702, "step": 12520 }, { "epoch": 1.2550207842940853, "grad_norm": 2.1585423946380615, "learning_rate": 4.270662883554361e-05, "loss": 0.5865, "step": 12530 }, { "epoch": 1.2560224370210848, "grad_norm": 2.516031503677368, "learning_rate": 4.2695496269444196e-05, "loss": 0.6007, "step": 12540 }, { "epoch": 1.2570240897480844, "grad_norm": 2.1451737880706787, "learning_rate": 4.2684356666871696e-05, "loss": 0.6683, "step": 12550 }, { "epoch": 1.2580257424750838, "grad_norm": 2.237550735473633, "learning_rate": 4.267321003225567e-05, "loss": 0.5501, "step": 12560 }, { "epoch": 1.2590273952020834, "grad_norm": 2.1487345695495605, "learning_rate": 4.266205637002849e-05, "loss": 0.6097, "step": 12570 }, { "epoch": 1.260029047929083, "grad_norm": 1.9155800342559814, "learning_rate": 4.2650895684625325e-05, "loss": 0.5589, "step": 12580 }, { "epoch": 1.2610307006560826, "grad_norm": 2.6508138179779053, "learning_rate": 4.263972798048413e-05, "loss": 0.624, "step": 12590 }, { "epoch": 1.262032353383082, "grad_norm": 3.7485527992248535, "learning_rate": 4.262855326204565e-05, "loss": 0.5505, "step": 12600 }, { "epoch": 1.2630340061100815, "grad_norm": 2.4273035526275635, "learning_rate": 4.2617371533753445e-05, "loss": 0.5796, "step": 12610 }, { "epoch": 1.2640356588370811, "grad_norm": 2.114297866821289, "learning_rate": 4.2606182800053806e-05, "loss": 0.614, "step": 12620 }, { "epoch": 1.2650373115640807, "grad_norm": 2.203754425048828, "learning_rate": 4.259498706539586e-05, "loss": 0.5348, "step": 12630 }, { "epoch": 1.2660389642910803, "grad_norm": 2.6585121154785156, "learning_rate": 4.258378433423152e-05, "loss": 0.5813, "step": 12640 }, { "epoch": 1.26704061701808, "grad_norm": 3.2380688190460205, "learning_rate": 4.257257461101542e-05, "loss": 0.6336, "step": 12650 }, { "epoch": 1.2680422697450795, "grad_norm": 2.2499895095825195, "learning_rate": 4.256135790020506e-05, "loss": 0.6303, "step": 12660 }, { "epoch": 1.2690439224720789, "grad_norm": 1.870112419128418, "learning_rate": 4.255013420626064e-05, "loss": 0.622, "step": 12670 }, { "epoch": 1.2700455751990785, "grad_norm": 2.667515277862549, "learning_rate": 4.2538903533645206e-05, "loss": 0.5994, "step": 12680 }, { "epoch": 1.271047227926078, "grad_norm": 2.1761505603790283, "learning_rate": 4.252766588682452e-05, "loss": 0.5713, "step": 12690 }, { "epoch": 1.2720488806530776, "grad_norm": 2.370919704437256, "learning_rate": 4.251642127026715e-05, "loss": 0.5416, "step": 12700 }, { "epoch": 1.273050533380077, "grad_norm": 1.990342617034912, "learning_rate": 4.2505169688444435e-05, "loss": 0.5408, "step": 12710 }, { "epoch": 1.2740521861070766, "grad_norm": 2.1336588859558105, "learning_rate": 4.2493911145830464e-05, "loss": 0.6172, "step": 12720 }, { "epoch": 1.2750538388340762, "grad_norm": 2.30049467086792, "learning_rate": 4.248264564690212e-05, "loss": 0.5551, "step": 12730 }, { "epoch": 1.2760554915610758, "grad_norm": 2.0460050106048584, "learning_rate": 4.247137319613904e-05, "loss": 0.5742, "step": 12740 }, { "epoch": 1.2770571442880754, "grad_norm": 2.2391974925994873, "learning_rate": 4.246009379802361e-05, "loss": 0.5554, "step": 12750 }, { "epoch": 1.278058797015075, "grad_norm": 2.327256917953491, "learning_rate": 4.2448807457041006e-05, "loss": 0.5254, "step": 12760 }, { "epoch": 1.2790604497420746, "grad_norm": 3.1748154163360596, "learning_rate": 4.243751417767915e-05, "loss": 0.5259, "step": 12770 }, { "epoch": 1.280062102469074, "grad_norm": 2.3406453132629395, "learning_rate": 4.2426213964428704e-05, "loss": 0.5984, "step": 12780 }, { "epoch": 1.2810637551960735, "grad_norm": 2.08475399017334, "learning_rate": 4.241490682178314e-05, "loss": 0.5541, "step": 12790 }, { "epoch": 1.282065407923073, "grad_norm": 2.3705079555511475, "learning_rate": 4.240359275423863e-05, "loss": 0.6299, "step": 12800 }, { "epoch": 1.2830670606500727, "grad_norm": 2.232518196105957, "learning_rate": 4.239227176629413e-05, "loss": 0.5389, "step": 12810 }, { "epoch": 1.284068713377072, "grad_norm": 2.1422078609466553, "learning_rate": 4.238094386245134e-05, "loss": 0.5609, "step": 12820 }, { "epoch": 1.2850703661040717, "grad_norm": 2.5519092082977295, "learning_rate": 4.236960904721472e-05, "loss": 0.5631, "step": 12830 }, { "epoch": 1.2860720188310713, "grad_norm": 3.0536038875579834, "learning_rate": 4.2358267325091456e-05, "loss": 0.5331, "step": 12840 }, { "epoch": 1.2870736715580708, "grad_norm": 2.136219024658203, "learning_rate": 4.2346918700591497e-05, "loss": 0.6137, "step": 12850 }, { "epoch": 1.2880753242850704, "grad_norm": 2.4962804317474365, "learning_rate": 4.2335563178227544e-05, "loss": 0.5247, "step": 12860 }, { "epoch": 1.28907697701207, "grad_norm": 3.05644154548645, "learning_rate": 4.232420076251501e-05, "loss": 0.5721, "step": 12870 }, { "epoch": 1.2900786297390694, "grad_norm": 2.8292741775512695, "learning_rate": 4.231283145797208e-05, "loss": 0.5837, "step": 12880 }, { "epoch": 1.291080282466069, "grad_norm": 3.002732753753662, "learning_rate": 4.2301455269119665e-05, "loss": 0.5213, "step": 12890 }, { "epoch": 1.2920819351930686, "grad_norm": 2.6589910984039307, "learning_rate": 4.229007220048142e-05, "loss": 0.5508, "step": 12900 }, { "epoch": 1.2930835879200682, "grad_norm": 2.5175070762634277, "learning_rate": 4.227868225658373e-05, "loss": 0.5646, "step": 12910 }, { "epoch": 1.2940852406470675, "grad_norm": 2.4398906230926514, "learning_rate": 4.226728544195572e-05, "loss": 0.5605, "step": 12920 }, { "epoch": 1.2950868933740671, "grad_norm": 1.8536179065704346, "learning_rate": 4.225588176112922e-05, "loss": 0.5898, "step": 12930 }, { "epoch": 1.2960885461010667, "grad_norm": 2.7143056392669678, "learning_rate": 4.224447121863885e-05, "loss": 0.6005, "step": 12940 }, { "epoch": 1.2970901988280663, "grad_norm": 2.5145957469940186, "learning_rate": 4.223305381902189e-05, "loss": 0.5204, "step": 12950 }, { "epoch": 1.298091851555066, "grad_norm": 1.898524522781372, "learning_rate": 4.222162956681839e-05, "loss": 0.5403, "step": 12960 }, { "epoch": 1.2990935042820655, "grad_norm": 2.350985527038574, "learning_rate": 4.221019846657112e-05, "loss": 0.5891, "step": 12970 }, { "epoch": 1.300095157009065, "grad_norm": 2.1936440467834473, "learning_rate": 4.219876052282555e-05, "loss": 0.5486, "step": 12980 }, { "epoch": 1.3010968097360645, "grad_norm": 2.114114999771118, "learning_rate": 4.21873157401299e-05, "loss": 0.6083, "step": 12990 }, { "epoch": 1.302098462463064, "grad_norm": 2.300907850265503, "learning_rate": 4.2175864123035085e-05, "loss": 0.6078, "step": 13000 }, { "epoch": 1.3031001151900636, "grad_norm": 2.4883675575256348, "learning_rate": 4.2164405676094766e-05, "loss": 0.5808, "step": 13010 }, { "epoch": 1.3041017679170632, "grad_norm": 2.3520798683166504, "learning_rate": 4.215294040386528e-05, "loss": 0.6279, "step": 13020 }, { "epoch": 1.3051034206440626, "grad_norm": 2.238900661468506, "learning_rate": 4.214146831090572e-05, "loss": 0.6265, "step": 13030 }, { "epoch": 1.3061050733710622, "grad_norm": 2.766570806503296, "learning_rate": 4.2129989401777876e-05, "loss": 0.6319, "step": 13040 }, { "epoch": 1.3071067260980618, "grad_norm": 2.1312856674194336, "learning_rate": 4.211850368104623e-05, "loss": 0.5227, "step": 13050 }, { "epoch": 1.3081083788250614, "grad_norm": 2.324946165084839, "learning_rate": 4.210701115327799e-05, "loss": 0.5999, "step": 13060 }, { "epoch": 1.309110031552061, "grad_norm": 2.1552188396453857, "learning_rate": 4.2095511823043064e-05, "loss": 0.5514, "step": 13070 }, { "epoch": 1.3101116842790606, "grad_norm": 2.5594635009765625, "learning_rate": 4.208400569491408e-05, "loss": 0.5999, "step": 13080 }, { "epoch": 1.31111333700606, "grad_norm": 1.8165738582611084, "learning_rate": 4.2072492773466366e-05, "loss": 0.5742, "step": 13090 }, { "epoch": 1.3121149897330595, "grad_norm": 2.341984987258911, "learning_rate": 4.2060973063277924e-05, "loss": 0.5683, "step": 13100 }, { "epoch": 1.313116642460059, "grad_norm": 2.9836859703063965, "learning_rate": 4.204944656892948e-05, "loss": 0.6223, "step": 13110 }, { "epoch": 1.3141182951870587, "grad_norm": 2.9429287910461426, "learning_rate": 4.203791329500446e-05, "loss": 0.5961, "step": 13120 }, { "epoch": 1.315119947914058, "grad_norm": 2.433025598526001, "learning_rate": 4.202637324608897e-05, "loss": 0.5167, "step": 13130 }, { "epoch": 1.3161216006410577, "grad_norm": 2.171271800994873, "learning_rate": 4.2014826426771825e-05, "loss": 0.5361, "step": 13140 }, { "epoch": 1.3171232533680572, "grad_norm": 2.2172558307647705, "learning_rate": 4.2003272841644525e-05, "loss": 0.5201, "step": 13150 }, { "epoch": 1.3181249060950568, "grad_norm": 2.2057085037231445, "learning_rate": 4.199171249530125e-05, "loss": 0.5465, "step": 13160 }, { "epoch": 1.3191265588220564, "grad_norm": 2.7693426609039307, "learning_rate": 4.1980145392338896e-05, "loss": 0.586, "step": 13170 }, { "epoch": 1.320128211549056, "grad_norm": 2.1529502868652344, "learning_rate": 4.196857153735702e-05, "loss": 0.5835, "step": 13180 }, { "epoch": 1.3211298642760556, "grad_norm": 2.1191604137420654, "learning_rate": 4.195699093495788e-05, "loss": 0.6044, "step": 13190 }, { "epoch": 1.322131517003055, "grad_norm": 1.6969443559646606, "learning_rate": 4.194540358974639e-05, "loss": 0.5782, "step": 13200 }, { "epoch": 1.3231331697300546, "grad_norm": 2.8751461505889893, "learning_rate": 4.19338095063302e-05, "loss": 0.5474, "step": 13210 }, { "epoch": 1.3241348224570542, "grad_norm": 1.8890577554702759, "learning_rate": 4.192220868931958e-05, "loss": 0.5827, "step": 13220 }, { "epoch": 1.3251364751840538, "grad_norm": 2.4971513748168945, "learning_rate": 4.1910601143327496e-05, "loss": 0.566, "step": 13230 }, { "epoch": 1.3261381279110531, "grad_norm": 2.1435883045196533, "learning_rate": 4.1898986872969626e-05, "loss": 0.5873, "step": 13240 }, { "epoch": 1.3271397806380527, "grad_norm": 2.1177799701690674, "learning_rate": 4.188736588286426e-05, "loss": 0.5642, "step": 13250 }, { "epoch": 1.3281414333650523, "grad_norm": 2.0428977012634277, "learning_rate": 4.187573817763242e-05, "loss": 0.5201, "step": 13260 }, { "epoch": 1.329143086092052, "grad_norm": 2.122833490371704, "learning_rate": 4.1864103761897746e-05, "loss": 0.5834, "step": 13270 }, { "epoch": 1.3301447388190515, "grad_norm": 2.3523943424224854, "learning_rate": 4.185246264028659e-05, "loss": 0.5832, "step": 13280 }, { "epoch": 1.331146391546051, "grad_norm": 1.596888780593872, "learning_rate": 4.184081481742794e-05, "loss": 0.6354, "step": 13290 }, { "epoch": 1.3321480442730507, "grad_norm": 2.279982328414917, "learning_rate": 4.182916029795346e-05, "loss": 0.5331, "step": 13300 }, { "epoch": 1.33314969700005, "grad_norm": 2.457702875137329, "learning_rate": 4.181749908649748e-05, "loss": 0.6325, "step": 13310 }, { "epoch": 1.3341513497270496, "grad_norm": 1.8950936794281006, "learning_rate": 4.180583118769699e-05, "loss": 0.5559, "step": 13320 }, { "epoch": 1.3351530024540492, "grad_norm": 2.870814561843872, "learning_rate": 4.179415660619164e-05, "loss": 0.5552, "step": 13330 }, { "epoch": 1.3361546551810488, "grad_norm": 1.7458791732788086, "learning_rate": 4.178247534662372e-05, "loss": 0.5206, "step": 13340 }, { "epoch": 1.3371563079080482, "grad_norm": 2.9492814540863037, "learning_rate": 4.17707874136382e-05, "loss": 0.499, "step": 13350 }, { "epoch": 1.3381579606350478, "grad_norm": 2.308802604675293, "learning_rate": 4.1759092811882696e-05, "loss": 0.6022, "step": 13360 }, { "epoch": 1.3391596133620474, "grad_norm": 1.9808357954025269, "learning_rate": 4.174739154600746e-05, "loss": 0.5856, "step": 13370 }, { "epoch": 1.340161266089047, "grad_norm": 2.5292766094207764, "learning_rate": 4.173568362066542e-05, "loss": 0.6017, "step": 13380 }, { "epoch": 1.3411629188160465, "grad_norm": 1.9373970031738281, "learning_rate": 4.172396904051215e-05, "loss": 0.5282, "step": 13390 }, { "epoch": 1.3421645715430461, "grad_norm": 3.495178699493408, "learning_rate": 4.1712247810205824e-05, "loss": 0.5122, "step": 13400 }, { "epoch": 1.3431662242700455, "grad_norm": 2.2471678256988525, "learning_rate": 4.170051993440733e-05, "loss": 0.5307, "step": 13410 }, { "epoch": 1.344167876997045, "grad_norm": 2.055947780609131, "learning_rate": 4.1688785417780155e-05, "loss": 0.5769, "step": 13420 }, { "epoch": 1.3451695297240447, "grad_norm": 2.5820884704589844, "learning_rate": 4.167704426499042e-05, "loss": 0.5912, "step": 13430 }, { "epoch": 1.3461711824510443, "grad_norm": 2.5380706787109375, "learning_rate": 4.1665296480706917e-05, "loss": 0.6127, "step": 13440 }, { "epoch": 1.3471728351780436, "grad_norm": 2.453075885772705, "learning_rate": 4.1653542069601055e-05, "loss": 0.5635, "step": 13450 }, { "epoch": 1.3481744879050432, "grad_norm": 2.359921932220459, "learning_rate": 4.164178103634688e-05, "loss": 0.5789, "step": 13460 }, { "epoch": 1.3491761406320428, "grad_norm": 2.4070520401000977, "learning_rate": 4.163001338562108e-05, "loss": 0.6129, "step": 13470 }, { "epoch": 1.3501777933590424, "grad_norm": 2.7085323333740234, "learning_rate": 4.1618239122102965e-05, "loss": 0.6108, "step": 13480 }, { "epoch": 1.351179446086042, "grad_norm": 2.0704357624053955, "learning_rate": 4.160645825047447e-05, "loss": 0.6244, "step": 13490 }, { "epoch": 1.3521810988130416, "grad_norm": 2.2796645164489746, "learning_rate": 4.159467077542016e-05, "loss": 0.5491, "step": 13500 }, { "epoch": 1.3531827515400412, "grad_norm": 2.2350540161132812, "learning_rate": 4.158287670162725e-05, "loss": 0.5411, "step": 13510 }, { "epoch": 1.3541844042670406, "grad_norm": 2.277599334716797, "learning_rate": 4.1571076033785556e-05, "loss": 0.5953, "step": 13520 }, { "epoch": 1.3551860569940402, "grad_norm": 2.4963531494140625, "learning_rate": 4.155926877658751e-05, "loss": 0.5321, "step": 13530 }, { "epoch": 1.3561877097210397, "grad_norm": 1.7574398517608643, "learning_rate": 4.154745493472817e-05, "loss": 0.5299, "step": 13540 }, { "epoch": 1.3571893624480393, "grad_norm": 2.508462905883789, "learning_rate": 4.1535634512905225e-05, "loss": 0.5877, "step": 13550 }, { "epoch": 1.3581910151750387, "grad_norm": 3.9622373580932617, "learning_rate": 4.152380751581897e-05, "loss": 0.5962, "step": 13560 }, { "epoch": 1.3591926679020383, "grad_norm": 2.56437087059021, "learning_rate": 4.151197394817231e-05, "loss": 0.5113, "step": 13570 }, { "epoch": 1.360194320629038, "grad_norm": 3.0979878902435303, "learning_rate": 4.150013381467078e-05, "loss": 0.5881, "step": 13580 }, { "epoch": 1.3611959733560375, "grad_norm": 2.280622959136963, "learning_rate": 4.148828712002252e-05, "loss": 0.6048, "step": 13590 }, { "epoch": 1.362197626083037, "grad_norm": 2.422917366027832, "learning_rate": 4.147643386893825e-05, "loss": 0.6643, "step": 13600 }, { "epoch": 1.3631992788100367, "grad_norm": 1.9158912897109985, "learning_rate": 4.146457406613134e-05, "loss": 0.5696, "step": 13610 }, { "epoch": 1.364200931537036, "grad_norm": 2.052349328994751, "learning_rate": 4.145270771631773e-05, "loss": 0.5746, "step": 13620 }, { "epoch": 1.3652025842640356, "grad_norm": 3.113779306411743, "learning_rate": 4.144083482421599e-05, "loss": 0.5859, "step": 13630 }, { "epoch": 1.3662042369910352, "grad_norm": 2.7686078548431396, "learning_rate": 4.1428955394547286e-05, "loss": 0.5829, "step": 13640 }, { "epoch": 1.3672058897180348, "grad_norm": 2.467316150665283, "learning_rate": 4.141706943203537e-05, "loss": 0.51, "step": 13650 }, { "epoch": 1.3682075424450342, "grad_norm": 2.354332447052002, "learning_rate": 4.140517694140661e-05, "loss": 0.5914, "step": 13660 }, { "epoch": 1.3692091951720338, "grad_norm": 2.3805387020111084, "learning_rate": 4.1393277927389946e-05, "loss": 0.6216, "step": 13670 }, { "epoch": 1.3702108478990334, "grad_norm": 1.6980246305465698, "learning_rate": 4.138137239471693e-05, "loss": 0.65, "step": 13680 }, { "epoch": 1.371212500626033, "grad_norm": 2.8044466972351074, "learning_rate": 4.136946034812171e-05, "loss": 0.5768, "step": 13690 }, { "epoch": 1.3722141533530325, "grad_norm": 2.284212112426758, "learning_rate": 4.135754179234102e-05, "loss": 0.548, "step": 13700 }, { "epoch": 1.3732158060800321, "grad_norm": 2.20108699798584, "learning_rate": 4.134561673211417e-05, "loss": 0.5497, "step": 13710 }, { "epoch": 1.3742174588070317, "grad_norm": 2.3220138549804688, "learning_rate": 4.133368517218305e-05, "loss": 0.6195, "step": 13720 }, { "epoch": 1.375219111534031, "grad_norm": 2.1386282444000244, "learning_rate": 4.132174711729217e-05, "loss": 0.563, "step": 13730 }, { "epoch": 1.3762207642610307, "grad_norm": 3.0128397941589355, "learning_rate": 4.130980257218861e-05, "loss": 0.5758, "step": 13740 }, { "epoch": 1.3772224169880303, "grad_norm": 2.1034698486328125, "learning_rate": 4.129785154162201e-05, "loss": 0.536, "step": 13750 }, { "epoch": 1.3782240697150299, "grad_norm": 2.1103596687316895, "learning_rate": 4.12858940303446e-05, "loss": 0.5476, "step": 13760 }, { "epoch": 1.3792257224420292, "grad_norm": 2.607357978820801, "learning_rate": 4.1273930043111185e-05, "loss": 0.5823, "step": 13770 }, { "epoch": 1.3802273751690288, "grad_norm": 2.7858710289001465, "learning_rate": 4.1261959584679156e-05, "loss": 0.5756, "step": 13780 }, { "epoch": 1.3812290278960284, "grad_norm": 2.6473164558410645, "learning_rate": 4.124998265980848e-05, "loss": 0.6113, "step": 13790 }, { "epoch": 1.382230680623028, "grad_norm": 2.2737371921539307, "learning_rate": 4.1237999273261676e-05, "loss": 0.6199, "step": 13800 }, { "epoch": 1.3832323333500276, "grad_norm": 2.420541524887085, "learning_rate": 4.1226009429803836e-05, "loss": 0.5491, "step": 13810 }, { "epoch": 1.3842339860770272, "grad_norm": 1.7622172832489014, "learning_rate": 4.121401313420264e-05, "loss": 0.4744, "step": 13820 }, { "epoch": 1.3852356388040268, "grad_norm": 2.3782007694244385, "learning_rate": 4.1202010391228306e-05, "loss": 0.6206, "step": 13830 }, { "epoch": 1.3862372915310262, "grad_norm": 2.8256185054779053, "learning_rate": 4.1190001205653636e-05, "loss": 0.5888, "step": 13840 }, { "epoch": 1.3872389442580257, "grad_norm": 2.546830177307129, "learning_rate": 4.117798558225399e-05, "loss": 0.5943, "step": 13850 }, { "epoch": 1.3882405969850253, "grad_norm": 3.37953782081604, "learning_rate": 4.116596352580728e-05, "loss": 0.5787, "step": 13860 }, { "epoch": 1.389242249712025, "grad_norm": 2.2097134590148926, "learning_rate": 4.1153935041093974e-05, "loss": 0.5856, "step": 13870 }, { "epoch": 1.3902439024390243, "grad_norm": 2.6662843227386475, "learning_rate": 4.114190013289712e-05, "loss": 0.5243, "step": 13880 }, { "epoch": 1.3912455551660239, "grad_norm": 1.8162366151809692, "learning_rate": 4.112985880600229e-05, "loss": 0.5903, "step": 13890 }, { "epoch": 1.3922472078930235, "grad_norm": 2.9583468437194824, "learning_rate": 4.111781106519763e-05, "loss": 0.5232, "step": 13900 }, { "epoch": 1.393248860620023, "grad_norm": 2.8035693168640137, "learning_rate": 4.1105756915273826e-05, "loss": 0.5892, "step": 13910 }, { "epoch": 1.3942505133470227, "grad_norm": 2.036820650100708, "learning_rate": 4.10936963610241e-05, "loss": 0.5729, "step": 13920 }, { "epoch": 1.3952521660740222, "grad_norm": 2.5496792793273926, "learning_rate": 4.108162940724427e-05, "loss": 0.5305, "step": 13930 }, { "epoch": 1.3962538188010216, "grad_norm": 2.4634103775024414, "learning_rate": 4.1069556058732624e-05, "loss": 0.6228, "step": 13940 }, { "epoch": 1.3972554715280212, "grad_norm": 1.6737140417099, "learning_rate": 4.105747632029006e-05, "loss": 0.5499, "step": 13950 }, { "epoch": 1.3982571242550208, "grad_norm": 2.705306053161621, "learning_rate": 4.104539019671997e-05, "loss": 0.5735, "step": 13960 }, { "epoch": 1.3992587769820204, "grad_norm": 2.2632908821105957, "learning_rate": 4.103329769282832e-05, "loss": 0.5479, "step": 13970 }, { "epoch": 1.4002604297090198, "grad_norm": 1.8262296915054321, "learning_rate": 4.10211988134236e-05, "loss": 0.5874, "step": 13980 }, { "epoch": 1.4012620824360194, "grad_norm": 2.7786033153533936, "learning_rate": 4.100909356331682e-05, "loss": 0.5826, "step": 13990 }, { "epoch": 1.402263735163019, "grad_norm": 2.1953024864196777, "learning_rate": 4.099698194732154e-05, "loss": 0.5816, "step": 14000 }, { "epoch": 1.4032653878900185, "grad_norm": 2.1544125080108643, "learning_rate": 4.098486397025386e-05, "loss": 0.5336, "step": 14010 }, { "epoch": 1.4042670406170181, "grad_norm": 2.1569459438323975, "learning_rate": 4.097273963693239e-05, "loss": 0.5122, "step": 14020 }, { "epoch": 1.4052686933440177, "grad_norm": 2.009190559387207, "learning_rate": 4.096060895217826e-05, "loss": 0.5175, "step": 14030 }, { "epoch": 1.4062703460710173, "grad_norm": 2.4000799655914307, "learning_rate": 4.094847192081516e-05, "loss": 0.5778, "step": 14040 }, { "epoch": 1.4072719987980167, "grad_norm": 1.9345402717590332, "learning_rate": 4.0936328547669264e-05, "loss": 0.5855, "step": 14050 }, { "epoch": 1.4082736515250163, "grad_norm": 2.416372060775757, "learning_rate": 4.09241788375693e-05, "loss": 0.627, "step": 14060 }, { "epoch": 1.4092753042520159, "grad_norm": 2.3259639739990234, "learning_rate": 4.091202279534651e-05, "loss": 0.5788, "step": 14070 }, { "epoch": 1.4102769569790155, "grad_norm": 2.430366277694702, "learning_rate": 4.089986042583465e-05, "loss": 0.5928, "step": 14080 }, { "epoch": 1.4112786097060148, "grad_norm": 2.045180320739746, "learning_rate": 4.088769173386996e-05, "loss": 0.6082, "step": 14090 }, { "epoch": 1.4122802624330144, "grad_norm": 2.674597978591919, "learning_rate": 4.0875516724291255e-05, "loss": 0.5751, "step": 14100 }, { "epoch": 1.413281915160014, "grad_norm": 2.7040843963623047, "learning_rate": 4.0863335401939815e-05, "loss": 0.5585, "step": 14110 }, { "epoch": 1.4142835678870136, "grad_norm": 2.3586766719818115, "learning_rate": 4.085114777165945e-05, "loss": 0.519, "step": 14120 }, { "epoch": 1.4152852206140132, "grad_norm": 1.9052765369415283, "learning_rate": 4.0838953838296464e-05, "loss": 0.5796, "step": 14130 }, { "epoch": 1.4162868733410128, "grad_norm": 1.9968295097351074, "learning_rate": 4.08267536066997e-05, "loss": 0.5678, "step": 14140 }, { "epoch": 1.4172885260680121, "grad_norm": 2.5665669441223145, "learning_rate": 4.081454708172047e-05, "loss": 0.5907, "step": 14150 }, { "epoch": 1.4182901787950117, "grad_norm": 2.1774866580963135, "learning_rate": 4.080233426821259e-05, "loss": 0.5392, "step": 14160 }, { "epoch": 1.4192918315220113, "grad_norm": 2.24665904045105, "learning_rate": 4.079011517103241e-05, "loss": 0.5701, "step": 14170 }, { "epoch": 1.420293484249011, "grad_norm": 2.3016774654388428, "learning_rate": 4.0777889795038736e-05, "loss": 0.5526, "step": 14180 }, { "epoch": 1.4212951369760103, "grad_norm": 3.0300395488739014, "learning_rate": 4.07656581450929e-05, "loss": 0.5284, "step": 14190 }, { "epoch": 1.4222967897030099, "grad_norm": 2.790093421936035, "learning_rate": 4.0753420226058724e-05, "loss": 0.5558, "step": 14200 }, { "epoch": 1.4232984424300095, "grad_norm": 2.152724027633667, "learning_rate": 4.074117604280252e-05, "loss": 0.6086, "step": 14210 }, { "epoch": 1.424300095157009, "grad_norm": 2.25683856010437, "learning_rate": 4.0728925600193076e-05, "loss": 0.5255, "step": 14220 }, { "epoch": 1.4253017478840087, "grad_norm": 3.285686492919922, "learning_rate": 4.07166689031017e-05, "loss": 0.5968, "step": 14230 }, { "epoch": 1.4263034006110082, "grad_norm": 2.408635377883911, "learning_rate": 4.070440595640217e-05, "loss": 0.5599, "step": 14240 }, { "epoch": 1.4273050533380078, "grad_norm": 2.2016947269439697, "learning_rate": 4.069213676497073e-05, "loss": 0.5839, "step": 14250 }, { "epoch": 1.4283067060650072, "grad_norm": 2.815704107284546, "learning_rate": 4.067986133368614e-05, "loss": 0.6123, "step": 14260 }, { "epoch": 1.4293083587920068, "grad_norm": 2.9127774238586426, "learning_rate": 4.0667579667429625e-05, "loss": 0.5208, "step": 14270 }, { "epoch": 1.4303100115190064, "grad_norm": 2.2251622676849365, "learning_rate": 4.0655291771084896e-05, "loss": 0.5773, "step": 14280 }, { "epoch": 1.431311664246006, "grad_norm": 2.434687614440918, "learning_rate": 4.064299764953813e-05, "loss": 0.5782, "step": 14290 }, { "epoch": 1.4323133169730053, "grad_norm": 2.1257598400115967, "learning_rate": 4.0630697307678e-05, "loss": 0.5693, "step": 14300 }, { "epoch": 1.433314969700005, "grad_norm": 2.1355576515197754, "learning_rate": 4.061839075039562e-05, "loss": 0.5607, "step": 14310 }, { "epoch": 1.4343166224270045, "grad_norm": 2.9941141605377197, "learning_rate": 4.060607798258459e-05, "loss": 0.6015, "step": 14320 }, { "epoch": 1.4353182751540041, "grad_norm": 1.9984222650527954, "learning_rate": 4.059375900914102e-05, "loss": 0.5565, "step": 14330 }, { "epoch": 1.4363199278810037, "grad_norm": 2.6559393405914307, "learning_rate": 4.058143383496341e-05, "loss": 0.5428, "step": 14340 }, { "epoch": 1.4373215806080033, "grad_norm": 2.177220106124878, "learning_rate": 4.05691024649528e-05, "loss": 0.607, "step": 14350 }, { "epoch": 1.438323233335003, "grad_norm": 2.5836830139160156, "learning_rate": 4.055676490401264e-05, "loss": 0.5665, "step": 14360 }, { "epoch": 1.4393248860620023, "grad_norm": 3.489238977432251, "learning_rate": 4.0544421157048875e-05, "loss": 0.5957, "step": 14370 }, { "epoch": 1.4403265387890019, "grad_norm": 2.5777053833007812, "learning_rate": 4.053207122896989e-05, "loss": 0.5574, "step": 14380 }, { "epoch": 1.4413281915160014, "grad_norm": 2.3161325454711914, "learning_rate": 4.0519715124686535e-05, "loss": 0.5757, "step": 14390 }, { "epoch": 1.4423298442430008, "grad_norm": 2.5164365768432617, "learning_rate": 4.050735284911212e-05, "loss": 0.5554, "step": 14400 }, { "epoch": 1.4433314969700004, "grad_norm": 2.887172222137451, "learning_rate": 4.049498440716241e-05, "loss": 0.6013, "step": 14410 }, { "epoch": 1.444333149697, "grad_norm": 1.7889225482940674, "learning_rate": 4.0482609803755604e-05, "loss": 0.5402, "step": 14420 }, { "epoch": 1.4453348024239996, "grad_norm": 2.003990650177002, "learning_rate": 4.047022904381238e-05, "loss": 0.5742, "step": 14430 }, { "epoch": 1.4463364551509992, "grad_norm": 2.028975248336792, "learning_rate": 4.045784213225584e-05, "loss": 0.5762, "step": 14440 }, { "epoch": 1.4473381078779988, "grad_norm": 3.170283317565918, "learning_rate": 4.0445449074011535e-05, "loss": 0.5331, "step": 14450 }, { "epoch": 1.4483397606049984, "grad_norm": 2.5726683139801025, "learning_rate": 4.0433049874007475e-05, "loss": 0.5711, "step": 14460 }, { "epoch": 1.4493414133319977, "grad_norm": 2.1386616230010986, "learning_rate": 4.042064453717411e-05, "loss": 0.5832, "step": 14470 }, { "epoch": 1.4503430660589973, "grad_norm": 3.0277740955352783, "learning_rate": 4.040823306844431e-05, "loss": 0.5874, "step": 14480 }, { "epoch": 1.451344718785997, "grad_norm": 2.120304822921753, "learning_rate": 4.039581547275339e-05, "loss": 0.5645, "step": 14490 }, { "epoch": 1.4523463715129965, "grad_norm": 1.4980655908584595, "learning_rate": 4.038339175503914e-05, "loss": 0.5484, "step": 14500 }, { "epoch": 1.4533480242399959, "grad_norm": 2.6482741832733154, "learning_rate": 4.037096192024171e-05, "loss": 0.573, "step": 14510 }, { "epoch": 1.4543496769669955, "grad_norm": 2.6279139518737793, "learning_rate": 4.035852597330375e-05, "loss": 0.5177, "step": 14520 }, { "epoch": 1.455351329693995, "grad_norm": 2.7452750205993652, "learning_rate": 4.034608391917032e-05, "loss": 0.489, "step": 14530 }, { "epoch": 1.4563529824209946, "grad_norm": 1.965122103691101, "learning_rate": 4.033363576278889e-05, "loss": 0.575, "step": 14540 }, { "epoch": 1.4573546351479942, "grad_norm": 2.4850306510925293, "learning_rate": 4.0321181509109374e-05, "loss": 0.5551, "step": 14550 }, { "epoch": 1.4583562878749938, "grad_norm": 2.552098274230957, "learning_rate": 4.0308721163084105e-05, "loss": 0.5781, "step": 14560 }, { "epoch": 1.4593579406019934, "grad_norm": 2.414721727371216, "learning_rate": 4.029625472966785e-05, "loss": 0.5155, "step": 14570 }, { "epoch": 1.4603595933289928, "grad_norm": 2.572413444519043, "learning_rate": 4.028378221381778e-05, "loss": 0.5722, "step": 14580 }, { "epoch": 1.4613612460559924, "grad_norm": 2.4709455966949463, "learning_rate": 4.027130362049348e-05, "loss": 0.6083, "step": 14590 }, { "epoch": 1.462362898782992, "grad_norm": 2.268739700317383, "learning_rate": 4.025881895465699e-05, "loss": 0.541, "step": 14600 }, { "epoch": 1.4633645515099916, "grad_norm": 2.681468963623047, "learning_rate": 4.024632822127271e-05, "loss": 0.5514, "step": 14610 }, { "epoch": 1.464366204236991, "grad_norm": 2.1005759239196777, "learning_rate": 4.023383142530751e-05, "loss": 0.5443, "step": 14620 }, { "epoch": 1.4653678569639905, "grad_norm": 2.0630645751953125, "learning_rate": 4.022132857173061e-05, "loss": 0.5303, "step": 14630 }, { "epoch": 1.4663695096909901, "grad_norm": 2.8516407012939453, "learning_rate": 4.0208819665513684e-05, "loss": 0.5339, "step": 14640 }, { "epoch": 1.4673711624179897, "grad_norm": 2.4817099571228027, "learning_rate": 4.0196304711630805e-05, "loss": 0.5363, "step": 14650 }, { "epoch": 1.4683728151449893, "grad_norm": 3.3822922706604004, "learning_rate": 4.0183783715058444e-05, "loss": 0.528, "step": 14660 }, { "epoch": 1.4693744678719889, "grad_norm": 2.3686294555664062, "learning_rate": 4.017125668077546e-05, "loss": 0.5148, "step": 14670 }, { "epoch": 1.4703761205989883, "grad_norm": 2.2232017517089844, "learning_rate": 4.0158723613763145e-05, "loss": 0.5897, "step": 14680 }, { "epoch": 1.4713777733259878, "grad_norm": 2.1292519569396973, "learning_rate": 4.014618451900517e-05, "loss": 0.59, "step": 14690 }, { "epoch": 1.4723794260529874, "grad_norm": 2.4203102588653564, "learning_rate": 4.013363940148759e-05, "loss": 0.5647, "step": 14700 }, { "epoch": 1.473381078779987, "grad_norm": 1.9357800483703613, "learning_rate": 4.0121088266198906e-05, "loss": 0.5342, "step": 14710 }, { "epoch": 1.4743827315069864, "grad_norm": 2.3694651126861572, "learning_rate": 4.0108531118129934e-05, "loss": 0.5526, "step": 14720 }, { "epoch": 1.475384384233986, "grad_norm": 2.351116180419922, "learning_rate": 4.009596796227396e-05, "loss": 0.489, "step": 14730 }, { "epoch": 1.4763860369609856, "grad_norm": 2.0364739894866943, "learning_rate": 4.0083398803626606e-05, "loss": 0.6545, "step": 14740 }, { "epoch": 1.4773876896879852, "grad_norm": 3.0939629077911377, "learning_rate": 4.00708236471859e-05, "loss": 0.5798, "step": 14750 }, { "epoch": 1.4783893424149848, "grad_norm": 1.8895127773284912, "learning_rate": 4.005824249795225e-05, "loss": 0.5247, "step": 14760 }, { "epoch": 1.4793909951419844, "grad_norm": 2.274951934814453, "learning_rate": 4.004565536092845e-05, "loss": 0.5154, "step": 14770 }, { "epoch": 1.480392647868984, "grad_norm": 2.3646650314331055, "learning_rate": 4.0033062241119676e-05, "loss": 0.5363, "step": 14780 }, { "epoch": 1.4813943005959833, "grad_norm": 2.129894256591797, "learning_rate": 4.002046314353348e-05, "loss": 0.537, "step": 14790 }, { "epoch": 1.482395953322983, "grad_norm": 2.5647120475769043, "learning_rate": 4.000785807317981e-05, "loss": 0.5885, "step": 14800 }, { "epoch": 1.4833976060499825, "grad_norm": 1.9728856086730957, "learning_rate": 3.999524703507095e-05, "loss": 0.5233, "step": 14810 }, { "epoch": 1.484399258776982, "grad_norm": 2.2294158935546875, "learning_rate": 3.998263003422159e-05, "loss": 0.5812, "step": 14820 }, { "epoch": 1.4854009115039815, "grad_norm": 2.4784843921661377, "learning_rate": 3.997000707564877e-05, "loss": 0.5809, "step": 14830 }, { "epoch": 1.486402564230981, "grad_norm": 1.835519552230835, "learning_rate": 3.995737816437192e-05, "loss": 0.5403, "step": 14840 }, { "epoch": 1.4874042169579806, "grad_norm": 2.5397074222564697, "learning_rate": 3.994474330541282e-05, "loss": 0.6364, "step": 14850 }, { "epoch": 1.4884058696849802, "grad_norm": 2.101902961730957, "learning_rate": 3.9932102503795616e-05, "loss": 0.5697, "step": 14860 }, { "epoch": 1.4894075224119798, "grad_norm": 2.234866142272949, "learning_rate": 3.991945576454683e-05, "loss": 0.4622, "step": 14870 }, { "epoch": 1.4904091751389794, "grad_norm": 2.1409013271331787, "learning_rate": 3.990680309269534e-05, "loss": 0.5951, "step": 14880 }, { "epoch": 1.4914108278659788, "grad_norm": 1.8268654346466064, "learning_rate": 3.9894144493272376e-05, "loss": 0.6112, "step": 14890 }, { "epoch": 1.4924124805929784, "grad_norm": 2.455508232116699, "learning_rate": 3.988147997131152e-05, "loss": 0.6239, "step": 14900 }, { "epoch": 1.493414133319978, "grad_norm": 2.3702023029327393, "learning_rate": 3.986880953184874e-05, "loss": 0.5383, "step": 14910 }, { "epoch": 1.4944157860469776, "grad_norm": 2.136279821395874, "learning_rate": 3.985613317992231e-05, "loss": 0.6343, "step": 14920 }, { "epoch": 1.495417438773977, "grad_norm": 2.7083253860473633, "learning_rate": 3.98434509205729e-05, "loss": 0.5874, "step": 14930 }, { "epoch": 1.4964190915009765, "grad_norm": 2.658341407775879, "learning_rate": 3.9830762758843496e-05, "loss": 0.5574, "step": 14940 }, { "epoch": 1.497420744227976, "grad_norm": 1.8282076120376587, "learning_rate": 3.981806869977945e-05, "loss": 0.5252, "step": 14950 }, { "epoch": 1.4984223969549757, "grad_norm": 2.072915554046631, "learning_rate": 3.980536874842846e-05, "loss": 0.5425, "step": 14960 }, { "epoch": 1.4994240496819753, "grad_norm": 2.2089407444000244, "learning_rate": 3.979266290984055e-05, "loss": 0.5542, "step": 14970 }, { "epoch": 1.5004257024089749, "grad_norm": 1.9677778482437134, "learning_rate": 3.97799511890681e-05, "loss": 0.59, "step": 14980 }, { "epoch": 1.5014273551359745, "grad_norm": 1.9966034889221191, "learning_rate": 3.976723359116583e-05, "loss": 0.616, "step": 14990 }, { "epoch": 1.502429007862974, "grad_norm": 2.4626691341400146, "learning_rate": 3.975451012119078e-05, "loss": 0.5608, "step": 15000 }, { "epoch": 1.5034306605899734, "grad_norm": 1.9694550037384033, "learning_rate": 3.974178078420234e-05, "loss": 0.5136, "step": 15010 }, { "epoch": 1.504432313316973, "grad_norm": 2.206510066986084, "learning_rate": 3.9729045585262235e-05, "loss": 0.5868, "step": 15020 }, { "epoch": 1.5054339660439724, "grad_norm": 1.8147963285446167, "learning_rate": 3.9716304529434504e-05, "loss": 0.5395, "step": 15030 }, { "epoch": 1.506435618770972, "grad_norm": 2.563148021697998, "learning_rate": 3.970355762178555e-05, "loss": 0.5693, "step": 15040 }, { "epoch": 1.5074372714979716, "grad_norm": 3.0714714527130127, "learning_rate": 3.9690804867384046e-05, "loss": 0.6563, "step": 15050 }, { "epoch": 1.5084389242249712, "grad_norm": 2.1805124282836914, "learning_rate": 3.967804627130105e-05, "loss": 0.5471, "step": 15060 }, { "epoch": 1.5094405769519708, "grad_norm": 2.3275880813598633, "learning_rate": 3.9665281838609905e-05, "loss": 0.4971, "step": 15070 }, { "epoch": 1.5104422296789703, "grad_norm": 2.1147117614746094, "learning_rate": 3.9652511574386286e-05, "loss": 0.5394, "step": 15080 }, { "epoch": 1.51144388240597, "grad_norm": 2.3367505073547363, "learning_rate": 3.9639735483708195e-05, "loss": 0.5847, "step": 15090 }, { "epoch": 1.5124455351329695, "grad_norm": 2.4513463973999023, "learning_rate": 3.9626953571655926e-05, "loss": 0.5493, "step": 15100 }, { "epoch": 1.513447187859969, "grad_norm": 2.5679237842559814, "learning_rate": 3.961416584331212e-05, "loss": 0.6568, "step": 15110 }, { "epoch": 1.5144488405869685, "grad_norm": 2.4463155269622803, "learning_rate": 3.960137230376171e-05, "loss": 0.5491, "step": 15120 }, { "epoch": 1.515450493313968, "grad_norm": 3.207108497619629, "learning_rate": 3.958857295809195e-05, "loss": 0.6069, "step": 15130 }, { "epoch": 1.5164521460409675, "grad_norm": 2.2444918155670166, "learning_rate": 3.957576781139238e-05, "loss": 0.5662, "step": 15140 }, { "epoch": 1.517453798767967, "grad_norm": 3.2347822189331055, "learning_rate": 3.9562956868754884e-05, "loss": 0.6197, "step": 15150 }, { "epoch": 1.5184554514949666, "grad_norm": 2.22012996673584, "learning_rate": 3.955014013527363e-05, "loss": 0.6081, "step": 15160 }, { "epoch": 1.5194571042219662, "grad_norm": 2.368472099304199, "learning_rate": 3.9537317616045075e-05, "loss": 0.4841, "step": 15170 }, { "epoch": 1.5204587569489658, "grad_norm": 1.771808385848999, "learning_rate": 3.952448931616801e-05, "loss": 0.5171, "step": 15180 }, { "epoch": 1.5214604096759654, "grad_norm": 2.661496877670288, "learning_rate": 3.9511655240743494e-05, "loss": 0.5657, "step": 15190 }, { "epoch": 1.522462062402965, "grad_norm": 2.4911489486694336, "learning_rate": 3.949881539487489e-05, "loss": 0.5156, "step": 15200 }, { "epoch": 1.5234637151299646, "grad_norm": 2.6235358715057373, "learning_rate": 3.948596978366787e-05, "loss": 0.5028, "step": 15210 }, { "epoch": 1.524465367856964, "grad_norm": 2.052980661392212, "learning_rate": 3.9473118412230406e-05, "loss": 0.5424, "step": 15220 }, { "epoch": 1.5254670205839636, "grad_norm": 2.1379051208496094, "learning_rate": 3.9460261285672716e-05, "loss": 0.6054, "step": 15230 }, { "epoch": 1.5264686733109631, "grad_norm": 2.0419297218322754, "learning_rate": 3.944739840910733e-05, "loss": 0.5623, "step": 15240 }, { "epoch": 1.5274703260379625, "grad_norm": 1.9566359519958496, "learning_rate": 3.9434529787649096e-05, "loss": 0.579, "step": 15250 }, { "epoch": 1.528471978764962, "grad_norm": 2.9252963066101074, "learning_rate": 3.9421655426415094e-05, "loss": 0.5774, "step": 15260 }, { "epoch": 1.5294736314919617, "grad_norm": 1.8522799015045166, "learning_rate": 3.940877533052473e-05, "loss": 0.546, "step": 15270 }, { "epoch": 1.5304752842189613, "grad_norm": 1.8291130065917969, "learning_rate": 3.939588950509966e-05, "loss": 0.4684, "step": 15280 }, { "epoch": 1.5314769369459609, "grad_norm": 2.20719313621521, "learning_rate": 3.9382997955263826e-05, "loss": 0.6081, "step": 15290 }, { "epoch": 1.5324785896729605, "grad_norm": 2.3221805095672607, "learning_rate": 3.937010068614346e-05, "loss": 0.5657, "step": 15300 }, { "epoch": 1.53348024239996, "grad_norm": 2.302234172821045, "learning_rate": 3.935719770286706e-05, "loss": 0.6058, "step": 15310 }, { "epoch": 1.5344818951269596, "grad_norm": 1.9883896112442017, "learning_rate": 3.934428901056538e-05, "loss": 0.5818, "step": 15320 }, { "epoch": 1.535483547853959, "grad_norm": 2.019928216934204, "learning_rate": 3.9331374614371485e-05, "loss": 0.5142, "step": 15330 }, { "epoch": 1.5364852005809586, "grad_norm": 2.7909915447235107, "learning_rate": 3.931845451942065e-05, "loss": 0.5924, "step": 15340 }, { "epoch": 1.537486853307958, "grad_norm": 2.2816619873046875, "learning_rate": 3.930552873085047e-05, "loss": 0.4988, "step": 15350 }, { "epoch": 1.5384885060349576, "grad_norm": 2.1884818077087402, "learning_rate": 3.929259725380077e-05, "loss": 0.4728, "step": 15360 }, { "epoch": 1.5394901587619572, "grad_norm": 1.680711269378662, "learning_rate": 3.927966009341365e-05, "loss": 0.5373, "step": 15370 }, { "epoch": 1.5404918114889568, "grad_norm": 1.9891623258590698, "learning_rate": 3.9266717254833475e-05, "loss": 0.4977, "step": 15380 }, { "epoch": 1.5414934642159563, "grad_norm": 2.236722946166992, "learning_rate": 3.9253768743206867e-05, "loss": 0.5669, "step": 15390 }, { "epoch": 1.542495116942956, "grad_norm": 2.8090734481811523, "learning_rate": 3.924081456368268e-05, "loss": 0.6334, "step": 15400 }, { "epoch": 1.5434967696699555, "grad_norm": 2.2476439476013184, "learning_rate": 3.922785472141205e-05, "loss": 0.5412, "step": 15410 }, { "epoch": 1.5444984223969551, "grad_norm": 2.49711012840271, "learning_rate": 3.9214889221548365e-05, "loss": 0.5622, "step": 15420 }, { "epoch": 1.5455000751239545, "grad_norm": 2.0667357444763184, "learning_rate": 3.920191806924723e-05, "loss": 0.5434, "step": 15430 }, { "epoch": 1.546501727850954, "grad_norm": 2.0976450443267822, "learning_rate": 3.9188941269666544e-05, "loss": 0.5828, "step": 15440 }, { "epoch": 1.5475033805779537, "grad_norm": 2.21097469329834, "learning_rate": 3.9175958827966416e-05, "loss": 0.5162, "step": 15450 }, { "epoch": 1.548505033304953, "grad_norm": 2.5535941123962402, "learning_rate": 3.9162970749309207e-05, "loss": 0.6123, "step": 15460 }, { "epoch": 1.5495066860319526, "grad_norm": 2.2607061862945557, "learning_rate": 3.9149977038859534e-05, "loss": 0.6232, "step": 15470 }, { "epoch": 1.5505083387589522, "grad_norm": 1.9447360038757324, "learning_rate": 3.913697770178423e-05, "loss": 0.5005, "step": 15480 }, { "epoch": 1.5515099914859518, "grad_norm": 1.894853115081787, "learning_rate": 3.9123972743252394e-05, "loss": 0.5458, "step": 15490 }, { "epoch": 1.5525116442129514, "grad_norm": 1.628859281539917, "learning_rate": 3.9110962168435315e-05, "loss": 0.5476, "step": 15500 }, { "epoch": 1.553513296939951, "grad_norm": 2.6020190715789795, "learning_rate": 3.9097945982506584e-05, "loss": 0.5331, "step": 15510 }, { "epoch": 1.5545149496669506, "grad_norm": 2.6441216468811035, "learning_rate": 3.908492419064196e-05, "loss": 0.578, "step": 15520 }, { "epoch": 1.5555166023939502, "grad_norm": 1.7780492305755615, "learning_rate": 3.907189679801945e-05, "loss": 0.501, "step": 15530 }, { "epoch": 1.5565182551209495, "grad_norm": 2.1867527961730957, "learning_rate": 3.90588638098193e-05, "loss": 0.5687, "step": 15540 }, { "epoch": 1.5575199078479491, "grad_norm": 2.3934149742126465, "learning_rate": 3.904582523122398e-05, "loss": 0.595, "step": 15550 }, { "epoch": 1.5585215605749485, "grad_norm": 2.72482967376709, "learning_rate": 3.9032781067418176e-05, "loss": 0.5956, "step": 15560 }, { "epoch": 1.559523213301948, "grad_norm": 2.589207410812378, "learning_rate": 3.9019731323588785e-05, "loss": 0.5599, "step": 15570 }, { "epoch": 1.5605248660289477, "grad_norm": 2.120452404022217, "learning_rate": 3.900667600492494e-05, "loss": 0.5554, "step": 15580 }, { "epoch": 1.5615265187559473, "grad_norm": 2.300694704055786, "learning_rate": 3.8993615116617985e-05, "loss": 0.5926, "step": 15590 }, { "epoch": 1.5625281714829469, "grad_norm": 2.3215324878692627, "learning_rate": 3.8980548663861485e-05, "loss": 0.5243, "step": 15600 }, { "epoch": 1.5635298242099465, "grad_norm": 2.2662432193756104, "learning_rate": 3.8967476651851196e-05, "loss": 0.5805, "step": 15610 }, { "epoch": 1.564531476936946, "grad_norm": 2.3407816886901855, "learning_rate": 3.895439908578511e-05, "loss": 0.5136, "step": 15620 }, { "epoch": 1.5655331296639456, "grad_norm": 2.640747547149658, "learning_rate": 3.894131597086341e-05, "loss": 0.5318, "step": 15630 }, { "epoch": 1.566534782390945, "grad_norm": 2.426098585128784, "learning_rate": 3.89282273122885e-05, "loss": 0.5826, "step": 15640 }, { "epoch": 1.5675364351179446, "grad_norm": 2.3364737033843994, "learning_rate": 3.891513311526498e-05, "loss": 0.4968, "step": 15650 }, { "epoch": 1.5685380878449442, "grad_norm": 2.151834011077881, "learning_rate": 3.890203338499965e-05, "loss": 0.5438, "step": 15660 }, { "epoch": 1.5695397405719436, "grad_norm": 2.491666555404663, "learning_rate": 3.8888928126701515e-05, "loss": 0.5787, "step": 15670 }, { "epoch": 1.5705413932989432, "grad_norm": 2.0937247276306152, "learning_rate": 3.887581734558177e-05, "loss": 0.5191, "step": 15680 }, { "epoch": 1.5715430460259427, "grad_norm": 2.5927858352661133, "learning_rate": 3.886270104685382e-05, "loss": 0.5813, "step": 15690 }, { "epoch": 1.5725446987529423, "grad_norm": 1.6065778732299805, "learning_rate": 3.884957923573325e-05, "loss": 0.5401, "step": 15700 }, { "epoch": 1.573546351479942, "grad_norm": 3.2194879055023193, "learning_rate": 3.883645191743786e-05, "loss": 0.5814, "step": 15710 }, { "epoch": 1.5745480042069415, "grad_norm": 2.245382308959961, "learning_rate": 3.88233190971876e-05, "loss": 0.586, "step": 15720 }, { "epoch": 1.5755496569339411, "grad_norm": 2.3317959308624268, "learning_rate": 3.8810180780204645e-05, "loss": 0.5584, "step": 15730 }, { "epoch": 1.5765513096609407, "grad_norm": 2.8339760303497314, "learning_rate": 3.8797036971713344e-05, "loss": 0.5879, "step": 15740 }, { "epoch": 1.57755296238794, "grad_norm": 2.1983373165130615, "learning_rate": 3.8783887676940225e-05, "loss": 0.6056, "step": 15750 }, { "epoch": 1.5785546151149397, "grad_norm": 2.6038060188293457, "learning_rate": 3.8770732901113994e-05, "loss": 0.5227, "step": 15760 }, { "epoch": 1.5795562678419393, "grad_norm": 3.548153877258301, "learning_rate": 3.875757264946555e-05, "loss": 0.5142, "step": 15770 }, { "epoch": 1.5805579205689386, "grad_norm": 2.2880027294158936, "learning_rate": 3.874440692722796e-05, "loss": 0.5454, "step": 15780 }, { "epoch": 1.5815595732959382, "grad_norm": 2.094909191131592, "learning_rate": 3.8731235739636476e-05, "loss": 0.5399, "step": 15790 }, { "epoch": 1.5825612260229378, "grad_norm": 2.4892055988311768, "learning_rate": 3.87180590919285e-05, "loss": 0.5292, "step": 15800 }, { "epoch": 1.5835628787499374, "grad_norm": 2.3183560371398926, "learning_rate": 3.870487698934363e-05, "loss": 0.5417, "step": 15810 }, { "epoch": 1.584564531476937, "grad_norm": 2.3660850524902344, "learning_rate": 3.869168943712362e-05, "loss": 0.5847, "step": 15820 }, { "epoch": 1.5855661842039366, "grad_norm": 2.3487000465393066, "learning_rate": 3.8678496440512415e-05, "loss": 0.5391, "step": 15830 }, { "epoch": 1.5865678369309362, "grad_norm": 1.8460687398910522, "learning_rate": 3.8665298004756075e-05, "loss": 0.5236, "step": 15840 }, { "epoch": 1.5875694896579355, "grad_norm": 2.310218334197998, "learning_rate": 3.8652094135102865e-05, "loss": 0.5462, "step": 15850 }, { "epoch": 1.5885711423849351, "grad_norm": 2.01802659034729, "learning_rate": 3.8638884836803205e-05, "loss": 0.5358, "step": 15860 }, { "epoch": 1.5895727951119347, "grad_norm": 1.9128429889678955, "learning_rate": 3.8625670115109667e-05, "loss": 0.6124, "step": 15870 }, { "epoch": 1.590574447838934, "grad_norm": 2.289938449859619, "learning_rate": 3.8612449975276965e-05, "loss": 0.5916, "step": 15880 }, { "epoch": 1.5915761005659337, "grad_norm": 3.483588457107544, "learning_rate": 3.8599224422561997e-05, "loss": 0.5341, "step": 15890 }, { "epoch": 1.5925777532929333, "grad_norm": 2.2982561588287354, "learning_rate": 3.858599346222379e-05, "loss": 0.5238, "step": 15900 }, { "epoch": 1.5935794060199329, "grad_norm": 3.378920555114746, "learning_rate": 3.857275709952354e-05, "loss": 0.5332, "step": 15910 }, { "epoch": 1.5945810587469325, "grad_norm": 1.8684322834014893, "learning_rate": 3.855951533972457e-05, "loss": 0.5602, "step": 15920 }, { "epoch": 1.595582711473932, "grad_norm": 2.5684077739715576, "learning_rate": 3.854626818809237e-05, "loss": 0.6066, "step": 15930 }, { "epoch": 1.5965843642009316, "grad_norm": 2.02518630027771, "learning_rate": 3.853301564989455e-05, "loss": 0.5548, "step": 15940 }, { "epoch": 1.5975860169279312, "grad_norm": 2.17681884765625, "learning_rate": 3.8519757730400894e-05, "loss": 0.5176, "step": 15950 }, { "epoch": 1.5985876696549306, "grad_norm": 2.2103607654571533, "learning_rate": 3.85064944348833e-05, "loss": 0.5658, "step": 15960 }, { "epoch": 1.5995893223819302, "grad_norm": 1.7236950397491455, "learning_rate": 3.849322576861582e-05, "loss": 0.5217, "step": 15970 }, { "epoch": 1.6005909751089298, "grad_norm": 1.8692692518234253, "learning_rate": 3.847995173687461e-05, "loss": 0.6032, "step": 15980 }, { "epoch": 1.6015926278359292, "grad_norm": 2.6671016216278076, "learning_rate": 3.8466672344938005e-05, "loss": 0.6187, "step": 15990 }, { "epoch": 1.6025942805629287, "grad_norm": 2.8001585006713867, "learning_rate": 3.845338759808644e-05, "loss": 0.5646, "step": 16000 }, { "epoch": 1.6035959332899283, "grad_norm": 2.768728733062744, "learning_rate": 3.844009750160249e-05, "loss": 0.6015, "step": 16010 }, { "epoch": 1.604597586016928, "grad_norm": 3.4323201179504395, "learning_rate": 3.842680206077086e-05, "loss": 0.5414, "step": 16020 }, { "epoch": 1.6055992387439275, "grad_norm": 2.223552942276001, "learning_rate": 3.841350128087837e-05, "loss": 0.5509, "step": 16030 }, { "epoch": 1.606600891470927, "grad_norm": 2.1531848907470703, "learning_rate": 3.840019516721398e-05, "loss": 0.6104, "step": 16040 }, { "epoch": 1.6076025441979267, "grad_norm": 2.558938503265381, "learning_rate": 3.8386883725068745e-05, "loss": 0.5257, "step": 16050 }, { "epoch": 1.6086041969249263, "grad_norm": 2.4752094745635986, "learning_rate": 3.837356695973586e-05, "loss": 0.5968, "step": 16060 }, { "epoch": 1.6096058496519257, "grad_norm": 2.3830080032348633, "learning_rate": 3.836024487651064e-05, "loss": 0.5585, "step": 16070 }, { "epoch": 1.6106075023789252, "grad_norm": 2.419825315475464, "learning_rate": 3.834691748069049e-05, "loss": 0.5552, "step": 16080 }, { "epoch": 1.6116091551059246, "grad_norm": 2.2571637630462646, "learning_rate": 3.833358477757496e-05, "loss": 0.5709, "step": 16090 }, { "epoch": 1.6126108078329242, "grad_norm": 2.4724700450897217, "learning_rate": 3.8320246772465674e-05, "loss": 0.553, "step": 16100 }, { "epoch": 1.6136124605599238, "grad_norm": 2.4371488094329834, "learning_rate": 3.8306903470666385e-05, "loss": 0.5119, "step": 16110 }, { "epoch": 1.6146141132869234, "grad_norm": 2.0440852642059326, "learning_rate": 3.829355487748297e-05, "loss": 0.6072, "step": 16120 }, { "epoch": 1.615615766013923, "grad_norm": 1.7289458513259888, "learning_rate": 3.828020099822338e-05, "loss": 0.5656, "step": 16130 }, { "epoch": 1.6166174187409226, "grad_norm": 2.366450071334839, "learning_rate": 3.826684183819768e-05, "loss": 0.5754, "step": 16140 }, { "epoch": 1.6176190714679222, "grad_norm": 1.9177955389022827, "learning_rate": 3.825347740271802e-05, "loss": 0.5357, "step": 16150 }, { "epoch": 1.6186207241949218, "grad_norm": 2.2635581493377686, "learning_rate": 3.824010769709868e-05, "loss": 0.5734, "step": 16160 }, { "epoch": 1.6196223769219211, "grad_norm": 2.226444959640503, "learning_rate": 3.8226732726656005e-05, "loss": 0.5329, "step": 16170 }, { "epoch": 1.6206240296489207, "grad_norm": 1.7379987239837646, "learning_rate": 3.821335249670845e-05, "loss": 0.5442, "step": 16180 }, { "epoch": 1.6216256823759203, "grad_norm": 2.6636769771575928, "learning_rate": 3.8199967012576566e-05, "loss": 0.5654, "step": 16190 }, { "epoch": 1.6226273351029197, "grad_norm": 2.491895914077759, "learning_rate": 3.818657627958296e-05, "loss": 0.5794, "step": 16200 }, { "epoch": 1.6236289878299193, "grad_norm": 2.4376542568206787, "learning_rate": 3.817318030305238e-05, "loss": 0.5393, "step": 16210 }, { "epoch": 1.6246306405569189, "grad_norm": 2.03060245513916, "learning_rate": 3.815977908831161e-05, "loss": 0.5238, "step": 16220 }, { "epoch": 1.6256322932839185, "grad_norm": 1.902113914489746, "learning_rate": 3.8146372640689536e-05, "loss": 0.5333, "step": 16230 }, { "epoch": 1.626633946010918, "grad_norm": 2.258549928665161, "learning_rate": 3.8132960965517135e-05, "loss": 0.5551, "step": 16240 }, { "epoch": 1.6276355987379176, "grad_norm": 1.7955327033996582, "learning_rate": 3.811954406812744e-05, "loss": 0.5322, "step": 16250 }, { "epoch": 1.6286372514649172, "grad_norm": 1.9605530500411987, "learning_rate": 3.810612195385558e-05, "loss": 0.5111, "step": 16260 }, { "epoch": 1.6296389041919168, "grad_norm": 2.7651565074920654, "learning_rate": 3.8092694628038764e-05, "loss": 0.472, "step": 16270 }, { "epoch": 1.6306405569189162, "grad_norm": 2.146113395690918, "learning_rate": 3.807926209601624e-05, "loss": 0.6197, "step": 16280 }, { "epoch": 1.6316422096459158, "grad_norm": 2.629794120788574, "learning_rate": 3.806582436312936e-05, "loss": 0.5674, "step": 16290 }, { "epoch": 1.6326438623729151, "grad_norm": 1.815111756324768, "learning_rate": 3.805238143472154e-05, "loss": 0.5137, "step": 16300 }, { "epoch": 1.6336455150999147, "grad_norm": 2.728011131286621, "learning_rate": 3.8038933316138225e-05, "loss": 0.5547, "step": 16310 }, { "epoch": 1.6346471678269143, "grad_norm": 1.9159232378005981, "learning_rate": 3.802548001272698e-05, "loss": 0.5681, "step": 16320 }, { "epoch": 1.635648820553914, "grad_norm": 2.153987407684326, "learning_rate": 3.801202152983738e-05, "loss": 0.5581, "step": 16330 }, { "epoch": 1.6366504732809135, "grad_norm": 2.767010450363159, "learning_rate": 3.7998557872821104e-05, "loss": 0.5055, "step": 16340 }, { "epoch": 1.637652126007913, "grad_norm": 2.1452322006225586, "learning_rate": 3.798508904703186e-05, "loss": 0.5336, "step": 16350 }, { "epoch": 1.6386537787349127, "grad_norm": 1.8051600456237793, "learning_rate": 3.797161505782543e-05, "loss": 0.566, "step": 16360 }, { "epoch": 1.6396554314619123, "grad_norm": 2.4228758811950684, "learning_rate": 3.795813591055961e-05, "loss": 0.5281, "step": 16370 }, { "epoch": 1.6406570841889117, "grad_norm": 2.133131742477417, "learning_rate": 3.794465161059431e-05, "loss": 0.5489, "step": 16380 }, { "epoch": 1.6416587369159112, "grad_norm": 2.345156192779541, "learning_rate": 3.793116216329143e-05, "loss": 0.5207, "step": 16390 }, { "epoch": 1.6426603896429108, "grad_norm": 2.2094056606292725, "learning_rate": 3.791766757401495e-05, "loss": 0.5303, "step": 16400 }, { "epoch": 1.6436620423699102, "grad_norm": 2.3264756202697754, "learning_rate": 3.790416784813088e-05, "loss": 0.5847, "step": 16410 }, { "epoch": 1.6446636950969098, "grad_norm": 2.0210537910461426, "learning_rate": 3.7890662991007294e-05, "loss": 0.6641, "step": 16420 }, { "epoch": 1.6456653478239094, "grad_norm": 2.2783915996551514, "learning_rate": 3.7877153008014275e-05, "loss": 0.6113, "step": 16430 }, { "epoch": 1.646667000550909, "grad_norm": 2.1935906410217285, "learning_rate": 3.7863637904523956e-05, "loss": 0.5113, "step": 16440 }, { "epoch": 1.6476686532779086, "grad_norm": 2.478006601333618, "learning_rate": 3.7850117685910535e-05, "loss": 0.5166, "step": 16450 }, { "epoch": 1.6486703060049082, "grad_norm": 2.1115570068359375, "learning_rate": 3.783659235755019e-05, "loss": 0.5136, "step": 16460 }, { "epoch": 1.6496719587319078, "grad_norm": 2.320053815841675, "learning_rate": 3.782306192482119e-05, "loss": 0.5142, "step": 16470 }, { "epoch": 1.6506736114589073, "grad_norm": 3.074125051498413, "learning_rate": 3.7809526393103785e-05, "loss": 0.5026, "step": 16480 }, { "epoch": 1.6516752641859067, "grad_norm": 2.644097328186035, "learning_rate": 3.779598576778026e-05, "loss": 0.5428, "step": 16490 }, { "epoch": 1.6526769169129063, "grad_norm": 2.510568857192993, "learning_rate": 3.7782440054234966e-05, "loss": 0.5376, "step": 16500 }, { "epoch": 1.653678569639906, "grad_norm": 1.993364691734314, "learning_rate": 3.7768889257854224e-05, "loss": 0.5449, "step": 16510 }, { "epoch": 1.6546802223669053, "grad_norm": 2.8113162517547607, "learning_rate": 3.775533338402641e-05, "loss": 0.5261, "step": 16520 }, { "epoch": 1.6556818750939049, "grad_norm": 2.3818821907043457, "learning_rate": 3.7741772438141916e-05, "loss": 0.6036, "step": 16530 }, { "epoch": 1.6566835278209044, "grad_norm": 2.2668466567993164, "learning_rate": 3.7728206425593126e-05, "loss": 0.5026, "step": 16540 }, { "epoch": 1.657685180547904, "grad_norm": 2.132610321044922, "learning_rate": 3.771463535177447e-05, "loss": 0.5779, "step": 16550 }, { "epoch": 1.6586868332749036, "grad_norm": 2.991370439529419, "learning_rate": 3.770105922208239e-05, "loss": 0.6101, "step": 16560 }, { "epoch": 1.6596884860019032, "grad_norm": 2.392225742340088, "learning_rate": 3.768747804191529e-05, "loss": 0.5587, "step": 16570 }, { "epoch": 1.6606901387289028, "grad_norm": 2.6654226779937744, "learning_rate": 3.767389181667365e-05, "loss": 0.4945, "step": 16580 }, { "epoch": 1.6616917914559024, "grad_norm": 2.315359115600586, "learning_rate": 3.766030055175991e-05, "loss": 0.6415, "step": 16590 }, { "epoch": 1.6626934441829018, "grad_norm": 3.1376960277557373, "learning_rate": 3.764670425257853e-05, "loss": 0.5494, "step": 16600 }, { "epoch": 1.6636950969099014, "grad_norm": 2.255483865737915, "learning_rate": 3.763310292453597e-05, "loss": 0.5318, "step": 16610 }, { "epoch": 1.6646967496369007, "grad_norm": 1.7115904092788696, "learning_rate": 3.761949657304068e-05, "loss": 0.5176, "step": 16620 }, { "epoch": 1.6656984023639003, "grad_norm": 1.925130009651184, "learning_rate": 3.760588520350315e-05, "loss": 0.5681, "step": 16630 }, { "epoch": 1.6667000550909, "grad_norm": 3.0387139320373535, "learning_rate": 3.75922688213358e-05, "loss": 0.4607, "step": 16640 }, { "epoch": 1.6677017078178995, "grad_norm": 1.6796802282333374, "learning_rate": 3.7578647431953086e-05, "loss": 0.6343, "step": 16650 }, { "epoch": 1.668703360544899, "grad_norm": 2.335080146789551, "learning_rate": 3.756502104077145e-05, "loss": 0.5031, "step": 16660 }, { "epoch": 1.6697050132718987, "grad_norm": 1.8291679620742798, "learning_rate": 3.75513896532093e-05, "loss": 0.5351, "step": 16670 }, { "epoch": 1.6707066659988983, "grad_norm": 2.4488725662231445, "learning_rate": 3.753775327468708e-05, "loss": 0.5344, "step": 16680 }, { "epoch": 1.6717083187258979, "grad_norm": 2.563654661178589, "learning_rate": 3.7524111910627157e-05, "loss": 0.6013, "step": 16690 }, { "epoch": 1.6727099714528972, "grad_norm": 2.365469455718994, "learning_rate": 3.7510465566453924e-05, "loss": 0.4762, "step": 16700 }, { "epoch": 1.6737116241798968, "grad_norm": 2.369333028793335, "learning_rate": 3.749681424759374e-05, "loss": 0.6089, "step": 16710 }, { "epoch": 1.6747132769068964, "grad_norm": 3.0419299602508545, "learning_rate": 3.748315795947495e-05, "loss": 0.5973, "step": 16720 }, { "epoch": 1.6757149296338958, "grad_norm": 2.051492929458618, "learning_rate": 3.7469496707527854e-05, "loss": 0.5512, "step": 16730 }, { "epoch": 1.6767165823608954, "grad_norm": 2.6813406944274902, "learning_rate": 3.745583049718475e-05, "loss": 0.5789, "step": 16740 }, { "epoch": 1.677718235087895, "grad_norm": 1.9882440567016602, "learning_rate": 3.74421593338799e-05, "loss": 0.5064, "step": 16750 }, { "epoch": 1.6787198878148946, "grad_norm": 2.8968048095703125, "learning_rate": 3.742848322304952e-05, "loss": 0.5373, "step": 16760 }, { "epoch": 1.6797215405418942, "grad_norm": 2.5871849060058594, "learning_rate": 3.741480217013182e-05, "loss": 0.5037, "step": 16770 }, { "epoch": 1.6807231932688937, "grad_norm": 2.5383193492889404, "learning_rate": 3.7401116180566954e-05, "loss": 0.4799, "step": 16780 }, { "epoch": 1.6817248459958933, "grad_norm": 2.1125566959381104, "learning_rate": 3.738742525979705e-05, "loss": 0.469, "step": 16790 }, { "epoch": 1.682726498722893, "grad_norm": 2.907052755355835, "learning_rate": 3.737372941326619e-05, "loss": 0.6347, "step": 16800 }, { "epoch": 1.6837281514498923, "grad_norm": 2.6465771198272705, "learning_rate": 3.736002864642042e-05, "loss": 0.5437, "step": 16810 }, { "epoch": 1.6847298041768919, "grad_norm": 2.14072322845459, "learning_rate": 3.7346322964707744e-05, "loss": 0.4944, "step": 16820 }, { "epoch": 1.6857314569038913, "grad_norm": 2.4284651279449463, "learning_rate": 3.733261237357812e-05, "loss": 0.5874, "step": 16830 }, { "epoch": 1.6867331096308908, "grad_norm": 2.144977569580078, "learning_rate": 3.731889687848344e-05, "loss": 0.5592, "step": 16840 }, { "epoch": 1.6877347623578904, "grad_norm": 1.864293098449707, "learning_rate": 3.730517648487758e-05, "loss": 0.5409, "step": 16850 }, { "epoch": 1.68873641508489, "grad_norm": 2.6154680252075195, "learning_rate": 3.7291451198216334e-05, "loss": 0.5337, "step": 16860 }, { "epoch": 1.6897380678118896, "grad_norm": 3.94486403465271, "learning_rate": 3.727772102395745e-05, "loss": 0.4953, "step": 16870 }, { "epoch": 1.6907397205388892, "grad_norm": 2.5961368083953857, "learning_rate": 3.726398596756063e-05, "loss": 0.5295, "step": 16880 }, { "epoch": 1.6917413732658888, "grad_norm": 2.7672832012176514, "learning_rate": 3.725024603448751e-05, "loss": 0.5942, "step": 16890 }, { "epoch": 1.6927430259928884, "grad_norm": 3.2715678215026855, "learning_rate": 3.723650123020166e-05, "loss": 0.6036, "step": 16900 }, { "epoch": 1.6937446787198878, "grad_norm": 2.5985188484191895, "learning_rate": 3.72227515601686e-05, "loss": 0.5415, "step": 16910 }, { "epoch": 1.6947463314468874, "grad_norm": 2.263279676437378, "learning_rate": 3.7208997029855764e-05, "loss": 0.5924, "step": 16920 }, { "epoch": 1.695747984173887, "grad_norm": 2.0555026531219482, "learning_rate": 3.7195237644732545e-05, "loss": 0.5864, "step": 16930 }, { "epoch": 1.6967496369008863, "grad_norm": 2.1603214740753174, "learning_rate": 3.718147341027024e-05, "loss": 0.5368, "step": 16940 }, { "epoch": 1.697751289627886, "grad_norm": 3.1869144439697266, "learning_rate": 3.71677043319421e-05, "loss": 0.6791, "step": 16950 }, { "epoch": 1.6987529423548855, "grad_norm": 2.106552839279175, "learning_rate": 3.715393041522328e-05, "loss": 0.5239, "step": 16960 }, { "epoch": 1.699754595081885, "grad_norm": 2.6369104385375977, "learning_rate": 3.714015166559087e-05, "loss": 0.5722, "step": 16970 }, { "epoch": 1.7007562478088847, "grad_norm": 2.095924139022827, "learning_rate": 3.7126368088523884e-05, "loss": 0.597, "step": 16980 }, { "epoch": 1.7017579005358843, "grad_norm": 2.8147194385528564, "learning_rate": 3.711257968950325e-05, "loss": 0.5293, "step": 16990 }, { "epoch": 1.7027595532628839, "grad_norm": 2.2241532802581787, "learning_rate": 3.709878647401181e-05, "loss": 0.5732, "step": 17000 }, { "epoch": 1.7037612059898835, "grad_norm": 2.0934462547302246, "learning_rate": 3.708498844753433e-05, "loss": 0.5241, "step": 17010 }, { "epoch": 1.7047628587168828, "grad_norm": 1.8266611099243164, "learning_rate": 3.707118561555748e-05, "loss": 0.4946, "step": 17020 }, { "epoch": 1.7057645114438824, "grad_norm": 1.8321797847747803, "learning_rate": 3.705737798356985e-05, "loss": 0.6098, "step": 17030 }, { "epoch": 1.706766164170882, "grad_norm": 2.4191246032714844, "learning_rate": 3.704356555706195e-05, "loss": 0.5116, "step": 17040 }, { "epoch": 1.7077678168978814, "grad_norm": 2.142313003540039, "learning_rate": 3.702974834152616e-05, "loss": 0.5581, "step": 17050 }, { "epoch": 1.708769469624881, "grad_norm": 2.1364362239837646, "learning_rate": 3.70159263424568e-05, "loss": 0.4971, "step": 17060 }, { "epoch": 1.7097711223518806, "grad_norm": 2.3862712383270264, "learning_rate": 3.7002099565350053e-05, "loss": 0.5418, "step": 17070 }, { "epoch": 1.7107727750788801, "grad_norm": 2.108258008956909, "learning_rate": 3.698826801570406e-05, "loss": 0.5097, "step": 17080 }, { "epoch": 1.7117744278058797, "grad_norm": 2.0370752811431885, "learning_rate": 3.6974431699018806e-05, "loss": 0.5491, "step": 17090 }, { "epoch": 1.7127760805328793, "grad_norm": 1.8179024457931519, "learning_rate": 3.69605906207962e-05, "loss": 0.5095, "step": 17100 }, { "epoch": 1.713777733259879, "grad_norm": 2.070310592651367, "learning_rate": 3.694674478654003e-05, "loss": 0.579, "step": 17110 }, { "epoch": 1.7147793859868785, "grad_norm": 2.2030398845672607, "learning_rate": 3.693289420175599e-05, "loss": 0.5857, "step": 17120 }, { "epoch": 1.7157810387138779, "grad_norm": 2.334590196609497, "learning_rate": 3.691903887195165e-05, "loss": 0.5118, "step": 17130 }, { "epoch": 1.7167826914408775, "grad_norm": 2.140634298324585, "learning_rate": 3.690517880263647e-05, "loss": 0.5775, "step": 17140 }, { "epoch": 1.7177843441678768, "grad_norm": 1.9049276113510132, "learning_rate": 3.68913139993218e-05, "loss": 0.5149, "step": 17150 }, { "epoch": 1.7187859968948764, "grad_norm": 2.7127888202667236, "learning_rate": 3.687744446752086e-05, "loss": 0.5779, "step": 17160 }, { "epoch": 1.719787649621876, "grad_norm": 2.346618413925171, "learning_rate": 3.686357021274877e-05, "loss": 0.5229, "step": 17170 }, { "epoch": 1.7207893023488756, "grad_norm": 2.411379337310791, "learning_rate": 3.684969124052251e-05, "loss": 0.5378, "step": 17180 }, { "epoch": 1.7217909550758752, "grad_norm": 2.139772891998291, "learning_rate": 3.683580755636094e-05, "loss": 0.5433, "step": 17190 }, { "epoch": 1.7227926078028748, "grad_norm": 2.1813013553619385, "learning_rate": 3.682191916578481e-05, "loss": 0.5352, "step": 17200 }, { "epoch": 1.7237942605298744, "grad_norm": 2.3435051441192627, "learning_rate": 3.680802607431673e-05, "loss": 0.5517, "step": 17210 }, { "epoch": 1.724795913256874, "grad_norm": 2.307100534439087, "learning_rate": 3.6794128287481136e-05, "loss": 0.5181, "step": 17220 }, { "epoch": 1.7257975659838733, "grad_norm": 1.8775631189346313, "learning_rate": 3.6780225810804426e-05, "loss": 0.5047, "step": 17230 }, { "epoch": 1.726799218710873, "grad_norm": 2.518465042114258, "learning_rate": 3.676631864981478e-05, "loss": 0.4953, "step": 17240 }, { "epoch": 1.7278008714378725, "grad_norm": 2.4192512035369873, "learning_rate": 3.675240681004227e-05, "loss": 0.5995, "step": 17250 }, { "epoch": 1.728802524164872, "grad_norm": 2.5736687183380127, "learning_rate": 3.673849029701883e-05, "loss": 0.6249, "step": 17260 }, { "epoch": 1.7298041768918715, "grad_norm": 1.8964473009109497, "learning_rate": 3.672456911627826e-05, "loss": 0.6039, "step": 17270 }, { "epoch": 1.730805829618871, "grad_norm": 1.9748671054840088, "learning_rate": 3.6710643273356206e-05, "loss": 0.5387, "step": 17280 }, { "epoch": 1.7318074823458707, "grad_norm": 2.3679747581481934, "learning_rate": 3.669671277379016e-05, "loss": 0.5977, "step": 17290 }, { "epoch": 1.7328091350728703, "grad_norm": 2.5027241706848145, "learning_rate": 3.6682777623119474e-05, "loss": 0.5304, "step": 17300 }, { "epoch": 1.7338107877998699, "grad_norm": 1.9013116359710693, "learning_rate": 3.666883782688535e-05, "loss": 0.5139, "step": 17310 }, { "epoch": 1.7348124405268694, "grad_norm": 2.220552682876587, "learning_rate": 3.665489339063085e-05, "loss": 0.5624, "step": 17320 }, { "epoch": 1.735814093253869, "grad_norm": 1.7051708698272705, "learning_rate": 3.664094431990085e-05, "loss": 0.4726, "step": 17330 }, { "epoch": 1.7368157459808684, "grad_norm": 1.9945650100708008, "learning_rate": 3.662699062024209e-05, "loss": 0.5907, "step": 17340 }, { "epoch": 1.737817398707868, "grad_norm": 2.306936502456665, "learning_rate": 3.661303229720316e-05, "loss": 0.5114, "step": 17350 }, { "epoch": 1.7388190514348674, "grad_norm": 2.131807565689087, "learning_rate": 3.659906935633446e-05, "loss": 0.5478, "step": 17360 }, { "epoch": 1.739820704161867, "grad_norm": 2.530008554458618, "learning_rate": 3.658510180318826e-05, "loss": 0.558, "step": 17370 }, { "epoch": 1.7408223568888666, "grad_norm": 1.6221373081207275, "learning_rate": 3.657112964331862e-05, "loss": 0.5436, "step": 17380 }, { "epoch": 1.7418240096158661, "grad_norm": 2.6965410709381104, "learning_rate": 3.6557152882281497e-05, "loss": 0.5593, "step": 17390 }, { "epoch": 1.7428256623428657, "grad_norm": 2.156054735183716, "learning_rate": 3.65431715256346e-05, "loss": 0.522, "step": 17400 }, { "epoch": 1.7438273150698653, "grad_norm": 2.559577465057373, "learning_rate": 3.652918557893753e-05, "loss": 0.5416, "step": 17410 }, { "epoch": 1.744828967796865, "grad_norm": 2.3171913623809814, "learning_rate": 3.651519504775167e-05, "loss": 0.5473, "step": 17420 }, { "epoch": 1.7458306205238645, "grad_norm": 2.0408565998077393, "learning_rate": 3.650119993764025e-05, "loss": 0.4801, "step": 17430 }, { "epoch": 1.7468322732508639, "grad_norm": 2.4444737434387207, "learning_rate": 3.648720025416832e-05, "loss": 0.5292, "step": 17440 }, { "epoch": 1.7478339259778635, "grad_norm": 3.833693027496338, "learning_rate": 3.647319600290273e-05, "loss": 0.5295, "step": 17450 }, { "epoch": 1.748835578704863, "grad_norm": 2.1103272438049316, "learning_rate": 3.6459187189412175e-05, "loss": 0.5395, "step": 17460 }, { "epoch": 1.7498372314318624, "grad_norm": 2.4942052364349365, "learning_rate": 3.6445173819267133e-05, "loss": 0.5076, "step": 17470 }, { "epoch": 1.750838884158862, "grad_norm": 2.432206869125366, "learning_rate": 3.643115589803992e-05, "loss": 0.5246, "step": 17480 }, { "epoch": 1.7518405368858616, "grad_norm": 2.308864116668701, "learning_rate": 3.641713343130465e-05, "loss": 0.6066, "step": 17490 }, { "epoch": 1.7528421896128612, "grad_norm": 2.465198516845703, "learning_rate": 3.640310642463723e-05, "loss": 0.5812, "step": 17500 }, { "epoch": 1.7538438423398608, "grad_norm": 2.269287347793579, "learning_rate": 3.6389074883615395e-05, "loss": 0.6508, "step": 17510 }, { "epoch": 1.7548454950668604, "grad_norm": 2.149580717086792, "learning_rate": 3.637503881381869e-05, "loss": 0.5685, "step": 17520 }, { "epoch": 1.75584714779386, "grad_norm": 2.5178351402282715, "learning_rate": 3.6360998220828436e-05, "loss": 0.5466, "step": 17530 }, { "epoch": 1.7568488005208596, "grad_norm": 2.1612465381622314, "learning_rate": 3.634695311022775e-05, "loss": 0.4816, "step": 17540 }, { "epoch": 1.757850453247859, "grad_norm": 2.286977529525757, "learning_rate": 3.6332903487601584e-05, "loss": 0.5219, "step": 17550 }, { "epoch": 1.7588521059748585, "grad_norm": 1.9696747064590454, "learning_rate": 3.6318849358536635e-05, "loss": 0.4647, "step": 17560 }, { "epoch": 1.7598537587018581, "grad_norm": 2.1928446292877197, "learning_rate": 3.630479072862143e-05, "loss": 0.5435, "step": 17570 }, { "epoch": 1.7608554114288575, "grad_norm": 2.580597162246704, "learning_rate": 3.629072760344627e-05, "loss": 0.5458, "step": 17580 }, { "epoch": 1.761857064155857, "grad_norm": 1.8262009620666504, "learning_rate": 3.6276659988603234e-05, "loss": 0.5767, "step": 17590 }, { "epoch": 1.7628587168828567, "grad_norm": 2.46189284324646, "learning_rate": 3.6262587889686205e-05, "loss": 0.5401, "step": 17600 }, { "epoch": 1.7638603696098563, "grad_norm": 3.054136037826538, "learning_rate": 3.624851131229084e-05, "loss": 0.4778, "step": 17610 }, { "epoch": 1.7648620223368559, "grad_norm": 2.439973831176758, "learning_rate": 3.6234430262014594e-05, "loss": 0.5884, "step": 17620 }, { "epoch": 1.7658636750638554, "grad_norm": 1.986525058746338, "learning_rate": 3.622034474445665e-05, "loss": 0.5668, "step": 17630 }, { "epoch": 1.766865327790855, "grad_norm": 1.8597817420959473, "learning_rate": 3.620625476521803e-05, "loss": 0.5563, "step": 17640 }, { "epoch": 1.7678669805178546, "grad_norm": 1.821752905845642, "learning_rate": 3.6192160329901484e-05, "loss": 0.5131, "step": 17650 }, { "epoch": 1.768868633244854, "grad_norm": 2.221463918685913, "learning_rate": 3.617806144411156e-05, "loss": 0.545, "step": 17660 }, { "epoch": 1.7698702859718536, "grad_norm": 2.033578395843506, "learning_rate": 3.6163958113454574e-05, "loss": 0.5321, "step": 17670 }, { "epoch": 1.770871938698853, "grad_norm": 1.9562311172485352, "learning_rate": 3.61498503435386e-05, "loss": 0.5289, "step": 17680 }, { "epoch": 1.7718735914258525, "grad_norm": 2.811387062072754, "learning_rate": 3.6135738139973466e-05, "loss": 0.5456, "step": 17690 }, { "epoch": 1.7728752441528521, "grad_norm": 1.598136067390442, "learning_rate": 3.6121621508370805e-05, "loss": 0.5147, "step": 17700 }, { "epoch": 1.7738768968798517, "grad_norm": 1.8884917497634888, "learning_rate": 3.610750045434396e-05, "loss": 0.5152, "step": 17710 }, { "epoch": 1.7748785496068513, "grad_norm": 1.971439003944397, "learning_rate": 3.609337498350805e-05, "loss": 0.5127, "step": 17720 }, { "epoch": 1.775880202333851, "grad_norm": 2.07023286819458, "learning_rate": 3.607924510147998e-05, "loss": 0.5321, "step": 17730 }, { "epoch": 1.7768818550608505, "grad_norm": 2.4199936389923096, "learning_rate": 3.6065110813878365e-05, "loss": 0.5243, "step": 17740 }, { "epoch": 1.77788350778785, "grad_norm": 2.1403119564056396, "learning_rate": 3.6050972126323615e-05, "loss": 0.553, "step": 17750 }, { "epoch": 1.7788851605148495, "grad_norm": 3.023754596710205, "learning_rate": 3.6036829044437835e-05, "loss": 0.5912, "step": 17760 }, { "epoch": 1.779886813241849, "grad_norm": 1.9545767307281494, "learning_rate": 3.602268157384493e-05, "loss": 0.6121, "step": 17770 }, { "epoch": 1.7808884659688486, "grad_norm": 1.6822097301483154, "learning_rate": 3.6008529720170524e-05, "loss": 0.5864, "step": 17780 }, { "epoch": 1.781890118695848, "grad_norm": 2.0851354598999023, "learning_rate": 3.5994373489041995e-05, "loss": 0.5074, "step": 17790 }, { "epoch": 1.7828917714228476, "grad_norm": 1.7990679740905762, "learning_rate": 3.598021288608845e-05, "loss": 0.4877, "step": 17800 }, { "epoch": 1.7838934241498472, "grad_norm": 2.6405606269836426, "learning_rate": 3.5966047916940734e-05, "loss": 0.5953, "step": 17810 }, { "epoch": 1.7848950768768468, "grad_norm": 2.476152181625366, "learning_rate": 3.595187858723144e-05, "loss": 0.532, "step": 17820 }, { "epoch": 1.7858967296038464, "grad_norm": 2.792933940887451, "learning_rate": 3.593770490259489e-05, "loss": 0.5781, "step": 17830 }, { "epoch": 1.786898382330846, "grad_norm": 2.29030179977417, "learning_rate": 3.592352686866713e-05, "loss": 0.5617, "step": 17840 }, { "epoch": 1.7879000350578456, "grad_norm": 2.300529956817627, "learning_rate": 3.590934449108596e-05, "loss": 0.5806, "step": 17850 }, { "epoch": 1.7889016877848452, "grad_norm": 2.4189414978027344, "learning_rate": 3.589515777549087e-05, "loss": 0.5626, "step": 17860 }, { "epoch": 1.7899033405118445, "grad_norm": 1.9713430404663086, "learning_rate": 3.588096672752309e-05, "loss": 0.5583, "step": 17870 }, { "epoch": 1.7909049932388441, "grad_norm": 2.270739793777466, "learning_rate": 3.5866771352825605e-05, "loss": 0.5607, "step": 17880 }, { "epoch": 1.7919066459658435, "grad_norm": 2.855412006378174, "learning_rate": 3.5852571657043075e-05, "loss": 0.5981, "step": 17890 }, { "epoch": 1.792908298692843, "grad_norm": 2.324159622192383, "learning_rate": 3.583836764582189e-05, "loss": 0.4861, "step": 17900 }, { "epoch": 1.7939099514198427, "grad_norm": 2.3632209300994873, "learning_rate": 3.5824159324810175e-05, "loss": 0.5487, "step": 17910 }, { "epoch": 1.7949116041468423, "grad_norm": 2.1716160774230957, "learning_rate": 3.580994669965774e-05, "loss": 0.4606, "step": 17920 }, { "epoch": 1.7959132568738418, "grad_norm": 2.514984607696533, "learning_rate": 3.579572977601615e-05, "loss": 0.4827, "step": 17930 }, { "epoch": 1.7969149096008414, "grad_norm": 2.0306577682495117, "learning_rate": 3.578150855953861e-05, "loss": 0.6076, "step": 17940 }, { "epoch": 1.797916562327841, "grad_norm": 2.062478542327881, "learning_rate": 3.576728305588012e-05, "loss": 0.558, "step": 17950 }, { "epoch": 1.7989182150548406, "grad_norm": 2.077491283416748, "learning_rate": 3.57530532706973e-05, "loss": 0.5423, "step": 17960 }, { "epoch": 1.79991986778184, "grad_norm": 1.7659790515899658, "learning_rate": 3.573881920964853e-05, "loss": 0.5647, "step": 17970 }, { "epoch": 1.8009215205088396, "grad_norm": 2.1615078449249268, "learning_rate": 3.5724580878393867e-05, "loss": 0.5863, "step": 17980 }, { "epoch": 1.8019231732358392, "grad_norm": 1.5051380395889282, "learning_rate": 3.571033828259507e-05, "loss": 0.5236, "step": 17990 }, { "epoch": 1.8029248259628385, "grad_norm": 2.394617795944214, "learning_rate": 3.569609142791559e-05, "loss": 0.5616, "step": 18000 }, { "epoch": 1.8039264786898381, "grad_norm": 2.699385643005371, "learning_rate": 3.5681840320020585e-05, "loss": 0.6394, "step": 18010 }, { "epoch": 1.8049281314168377, "grad_norm": 2.4347870349884033, "learning_rate": 3.566758496457688e-05, "loss": 0.5286, "step": 18020 }, { "epoch": 1.8059297841438373, "grad_norm": 1.4760379791259766, "learning_rate": 3.5653325367253016e-05, "loss": 0.5071, "step": 18030 }, { "epoch": 1.806931436870837, "grad_norm": 2.0530753135681152, "learning_rate": 3.56390615337192e-05, "loss": 0.5589, "step": 18040 }, { "epoch": 1.8079330895978365, "grad_norm": 2.031733751296997, "learning_rate": 3.5624793469647344e-05, "loss": 0.4645, "step": 18050 }, { "epoch": 1.808934742324836, "grad_norm": 2.617835283279419, "learning_rate": 3.5610521180711015e-05, "loss": 0.6479, "step": 18060 }, { "epoch": 1.8099363950518357, "grad_norm": 2.419201374053955, "learning_rate": 3.559624467258548e-05, "loss": 0.4964, "step": 18070 }, { "epoch": 1.810938047778835, "grad_norm": 1.7701033353805542, "learning_rate": 3.5581963950947686e-05, "loss": 0.525, "step": 18080 }, { "epoch": 1.8119397005058346, "grad_norm": 2.738837480545044, "learning_rate": 3.556767902147623e-05, "loss": 0.5093, "step": 18090 }, { "epoch": 1.8129413532328342, "grad_norm": 1.944037914276123, "learning_rate": 3.5553389889851426e-05, "loss": 0.5076, "step": 18100 }, { "epoch": 1.8139430059598336, "grad_norm": 2.223052978515625, "learning_rate": 3.553909656175522e-05, "loss": 0.5436, "step": 18110 }, { "epoch": 1.8149446586868332, "grad_norm": 2.251912832260132, "learning_rate": 3.552479904287123e-05, "loss": 0.5097, "step": 18120 }, { "epoch": 1.8159463114138328, "grad_norm": 2.422960042953491, "learning_rate": 3.5510497338884774e-05, "loss": 0.5101, "step": 18130 }, { "epoch": 1.8169479641408324, "grad_norm": 2.5609405040740967, "learning_rate": 3.549619145548279e-05, "loss": 0.542, "step": 18140 }, { "epoch": 1.817949616867832, "grad_norm": 2.37107515335083, "learning_rate": 3.54818813983539e-05, "loss": 0.5574, "step": 18150 }, { "epoch": 1.8189512695948316, "grad_norm": 2.355593681335449, "learning_rate": 3.54675671731884e-05, "loss": 0.5213, "step": 18160 }, { "epoch": 1.8199529223218311, "grad_norm": 2.0474841594696045, "learning_rate": 3.545324878567821e-05, "loss": 0.5093, "step": 18170 }, { "epoch": 1.8209545750488307, "grad_norm": 2.3665642738342285, "learning_rate": 3.543892624151693e-05, "loss": 0.5607, "step": 18180 }, { "epoch": 1.82195622777583, "grad_norm": 2.511904239654541, "learning_rate": 3.542459954639981e-05, "loss": 0.5602, "step": 18190 }, { "epoch": 1.8229578805028297, "grad_norm": 2.2814455032348633, "learning_rate": 3.541026870602375e-05, "loss": 0.5188, "step": 18200 }, { "epoch": 1.823959533229829, "grad_norm": 2.252317428588867, "learning_rate": 3.539593372608727e-05, "loss": 0.4779, "step": 18210 }, { "epoch": 1.8249611859568287, "grad_norm": 1.9406018257141113, "learning_rate": 3.538159461229059e-05, "loss": 0.6143, "step": 18220 }, { "epoch": 1.8259628386838282, "grad_norm": 2.396653175354004, "learning_rate": 3.5367251370335526e-05, "loss": 0.5132, "step": 18230 }, { "epoch": 1.8269644914108278, "grad_norm": 2.2335658073425293, "learning_rate": 3.535290400592556e-05, "loss": 0.5352, "step": 18240 }, { "epoch": 1.8279661441378274, "grad_norm": 2.568760395050049, "learning_rate": 3.53385525247658e-05, "loss": 0.5214, "step": 18250 }, { "epoch": 1.828967796864827, "grad_norm": 1.8453973531723022, "learning_rate": 3.532419693256301e-05, "loss": 0.5491, "step": 18260 }, { "epoch": 1.8299694495918266, "grad_norm": 2.2260847091674805, "learning_rate": 3.5309837235025574e-05, "loss": 0.5602, "step": 18270 }, { "epoch": 1.8309711023188262, "grad_norm": 1.965449333190918, "learning_rate": 3.5295473437863505e-05, "loss": 0.554, "step": 18280 }, { "epoch": 1.8319727550458256, "grad_norm": 2.2575039863586426, "learning_rate": 3.528110554678846e-05, "loss": 0.597, "step": 18290 }, { "epoch": 1.8329744077728252, "grad_norm": 1.884130835533142, "learning_rate": 3.526673356751371e-05, "loss": 0.5049, "step": 18300 }, { "epoch": 1.8339760604998248, "grad_norm": 2.2805416584014893, "learning_rate": 3.525235750575416e-05, "loss": 0.5269, "step": 18310 }, { "epoch": 1.8349777132268241, "grad_norm": 2.192514419555664, "learning_rate": 3.523797736722634e-05, "loss": 0.5541, "step": 18320 }, { "epoch": 1.8359793659538237, "grad_norm": 2.4717044830322266, "learning_rate": 3.52235931576484e-05, "loss": 0.5149, "step": 18330 }, { "epoch": 1.8369810186808233, "grad_norm": 2.0693359375, "learning_rate": 3.520920488274009e-05, "loss": 0.5642, "step": 18340 }, { "epoch": 1.837982671407823, "grad_norm": 2.423137903213501, "learning_rate": 3.51948125482228e-05, "loss": 0.5392, "step": 18350 }, { "epoch": 1.8389843241348225, "grad_norm": 2.4728684425354004, "learning_rate": 3.518041615981954e-05, "loss": 0.5382, "step": 18360 }, { "epoch": 1.839985976861822, "grad_norm": 2.3842051029205322, "learning_rate": 3.516601572325491e-05, "loss": 0.5356, "step": 18370 }, { "epoch": 1.8409876295888217, "grad_norm": 1.8995606899261475, "learning_rate": 3.515161124425513e-05, "loss": 0.505, "step": 18380 }, { "epoch": 1.8419892823158213, "grad_norm": 2.8482911586761475, "learning_rate": 3.513720272854802e-05, "loss": 0.5478, "step": 18390 }, { "epoch": 1.8429909350428206, "grad_norm": 1.9546759128570557, "learning_rate": 3.5122790181863017e-05, "loss": 0.4782, "step": 18400 }, { "epoch": 1.8439925877698202, "grad_norm": 2.6709511280059814, "learning_rate": 3.510837360993116e-05, "loss": 0.5357, "step": 18410 }, { "epoch": 1.8449942404968196, "grad_norm": 2.3356592655181885, "learning_rate": 3.5093953018485076e-05, "loss": 0.5344, "step": 18420 }, { "epoch": 1.8459958932238192, "grad_norm": 2.1896042823791504, "learning_rate": 3.507952841325899e-05, "loss": 0.6113, "step": 18430 }, { "epoch": 1.8469975459508188, "grad_norm": 1.9698134660720825, "learning_rate": 3.5065099799988766e-05, "loss": 0.4655, "step": 18440 }, { "epoch": 1.8479991986778184, "grad_norm": 2.549419403076172, "learning_rate": 3.505066718441179e-05, "loss": 0.4673, "step": 18450 }, { "epoch": 1.849000851404818, "grad_norm": 2.0355772972106934, "learning_rate": 3.503623057226709e-05, "loss": 0.5131, "step": 18460 }, { "epoch": 1.8500025041318175, "grad_norm": 2.1255834102630615, "learning_rate": 3.502178996929527e-05, "loss": 0.5656, "step": 18470 }, { "epoch": 1.8510041568588171, "grad_norm": 2.1015443801879883, "learning_rate": 3.500734538123852e-05, "loss": 0.5569, "step": 18480 }, { "epoch": 1.8520058095858167, "grad_norm": 2.9126319885253906, "learning_rate": 3.4992896813840624e-05, "loss": 0.5279, "step": 18490 }, { "epoch": 1.853007462312816, "grad_norm": 2.6113879680633545, "learning_rate": 3.497844427284693e-05, "loss": 0.6152, "step": 18500 }, { "epoch": 1.8540091150398157, "grad_norm": 2.1508989334106445, "learning_rate": 3.496398776400437e-05, "loss": 0.5262, "step": 18510 }, { "epoch": 1.8550107677668153, "grad_norm": 2.867258071899414, "learning_rate": 3.4949527293061475e-05, "loss": 0.6089, "step": 18520 }, { "epoch": 1.8560124204938147, "grad_norm": 2.2705745697021484, "learning_rate": 3.493506286576832e-05, "loss": 0.5411, "step": 18530 }, { "epoch": 1.8570140732208142, "grad_norm": 2.0853490829467773, "learning_rate": 3.492059448787659e-05, "loss": 0.5184, "step": 18540 }, { "epoch": 1.8580157259478138, "grad_norm": 2.6328513622283936, "learning_rate": 3.4906122165139496e-05, "loss": 0.5298, "step": 18550 }, { "epoch": 1.8590173786748134, "grad_norm": 2.7920444011688232, "learning_rate": 3.489164590331186e-05, "loss": 0.5228, "step": 18560 }, { "epoch": 1.860019031401813, "grad_norm": 1.7941895723342896, "learning_rate": 3.487716570815004e-05, "loss": 0.5057, "step": 18570 }, { "epoch": 1.8610206841288126, "grad_norm": 1.9120043516159058, "learning_rate": 3.4862681585411984e-05, "loss": 0.5185, "step": 18580 }, { "epoch": 1.8620223368558122, "grad_norm": 1.4897717237472534, "learning_rate": 3.484819354085717e-05, "loss": 0.4982, "step": 18590 }, { "epoch": 1.8630239895828118, "grad_norm": 1.8527815341949463, "learning_rate": 3.483370158024667e-05, "loss": 0.5612, "step": 18600 }, { "epoch": 1.8640256423098112, "grad_norm": 2.3453290462493896, "learning_rate": 3.481920570934308e-05, "loss": 0.4998, "step": 18610 }, { "epoch": 1.8650272950368108, "grad_norm": 2.2166287899017334, "learning_rate": 3.48047059339106e-05, "loss": 0.5478, "step": 18620 }, { "epoch": 1.8660289477638103, "grad_norm": 1.6502530574798584, "learning_rate": 3.479020225971491e-05, "loss": 0.535, "step": 18630 }, { "epoch": 1.8670306004908097, "grad_norm": 1.860781192779541, "learning_rate": 3.4775694692523306e-05, "loss": 0.4861, "step": 18640 }, { "epoch": 1.8680322532178093, "grad_norm": 2.397423505783081, "learning_rate": 3.476118323810459e-05, "loss": 0.5373, "step": 18650 }, { "epoch": 1.869033905944809, "grad_norm": 2.4832894802093506, "learning_rate": 3.474666790222914e-05, "loss": 0.5306, "step": 18660 }, { "epoch": 1.8700355586718085, "grad_norm": 2.5092248916625977, "learning_rate": 3.4732148690668866e-05, "loss": 0.5647, "step": 18670 }, { "epoch": 1.871037211398808, "grad_norm": 2.138489007949829, "learning_rate": 3.471762560919719e-05, "loss": 0.5775, "step": 18680 }, { "epoch": 1.8720388641258077, "grad_norm": 2.200510263442993, "learning_rate": 3.470309866358914e-05, "loss": 0.5667, "step": 18690 }, { "epoch": 1.8730405168528073, "grad_norm": 2.08661150932312, "learning_rate": 3.46885678596212e-05, "loss": 0.5655, "step": 18700 }, { "epoch": 1.8740421695798066, "grad_norm": 2.4580740928649902, "learning_rate": 3.4674033203071464e-05, "loss": 0.522, "step": 18710 }, { "epoch": 1.8750438223068062, "grad_norm": 2.8262064456939697, "learning_rate": 3.46594946997195e-05, "loss": 0.5338, "step": 18720 }, { "epoch": 1.8760454750338058, "grad_norm": 2.0510544776916504, "learning_rate": 3.4644952355346435e-05, "loss": 0.588, "step": 18730 }, { "epoch": 1.8770471277608052, "grad_norm": 2.456479549407959, "learning_rate": 3.463040617573491e-05, "loss": 0.6353, "step": 18740 }, { "epoch": 1.8780487804878048, "grad_norm": 2.268458127975464, "learning_rate": 3.461585616666911e-05, "loss": 0.5018, "step": 18750 }, { "epoch": 1.8790504332148044, "grad_norm": 2.170693874359131, "learning_rate": 3.460130233393472e-05, "loss": 0.5805, "step": 18760 }, { "epoch": 1.880052085941804, "grad_norm": 2.764228582382202, "learning_rate": 3.458674468331896e-05, "loss": 0.5206, "step": 18770 }, { "epoch": 1.8810537386688035, "grad_norm": 1.9646960496902466, "learning_rate": 3.457218322061056e-05, "loss": 0.4927, "step": 18780 }, { "epoch": 1.8820553913958031, "grad_norm": 2.303696870803833, "learning_rate": 3.455761795159978e-05, "loss": 0.5437, "step": 18790 }, { "epoch": 1.8830570441228027, "grad_norm": 2.2272274494171143, "learning_rate": 3.454304888207837e-05, "loss": 0.6098, "step": 18800 }, { "epoch": 1.8840586968498023, "grad_norm": 2.036489248275757, "learning_rate": 3.452847601783959e-05, "loss": 0.5595, "step": 18810 }, { "epoch": 1.8850603495768017, "grad_norm": 2.238173007965088, "learning_rate": 3.451389936467827e-05, "loss": 0.5055, "step": 18820 }, { "epoch": 1.8860620023038013, "grad_norm": 2.1000025272369385, "learning_rate": 3.4499318928390665e-05, "loss": 0.5292, "step": 18830 }, { "epoch": 1.8870636550308009, "grad_norm": 2.115056037902832, "learning_rate": 3.448473471477457e-05, "loss": 0.5193, "step": 18840 }, { "epoch": 1.8880653077578002, "grad_norm": 2.3937647342681885, "learning_rate": 3.44701467296293e-05, "loss": 0.5607, "step": 18850 }, { "epoch": 1.8890669604847998, "grad_norm": 2.403609037399292, "learning_rate": 3.4455554978755634e-05, "loss": 0.4612, "step": 18860 }, { "epoch": 1.8900686132117994, "grad_norm": 2.042008399963379, "learning_rate": 3.444095946795587e-05, "loss": 0.5366, "step": 18870 }, { "epoch": 1.891070265938799, "grad_norm": 2.9627082347869873, "learning_rate": 3.44263602030338e-05, "loss": 0.5807, "step": 18880 }, { "epoch": 1.8920719186657986, "grad_norm": 1.7297526597976685, "learning_rate": 3.4411757189794703e-05, "loss": 0.4538, "step": 18890 }, { "epoch": 1.8930735713927982, "grad_norm": 2.134171724319458, "learning_rate": 3.439715043404535e-05, "loss": 0.5695, "step": 18900 }, { "epoch": 1.8940752241197978, "grad_norm": 3.0094010829925537, "learning_rate": 3.4382539941594e-05, "loss": 0.4792, "step": 18910 }, { "epoch": 1.8950768768467974, "grad_norm": 1.9440160989761353, "learning_rate": 3.4367925718250405e-05, "loss": 0.5362, "step": 18920 }, { "epoch": 1.8960785295737967, "grad_norm": 2.718689203262329, "learning_rate": 3.4353307769825794e-05, "loss": 0.4992, "step": 18930 }, { "epoch": 1.8970801823007963, "grad_norm": 2.141523599624634, "learning_rate": 3.433868610213286e-05, "loss": 0.5421, "step": 18940 }, { "epoch": 1.8980818350277957, "grad_norm": 2.2745018005371094, "learning_rate": 3.4324060720985815e-05, "loss": 0.5296, "step": 18950 }, { "epoch": 1.8990834877547953, "grad_norm": 2.313880443572998, "learning_rate": 3.4309431632200325e-05, "loss": 0.5812, "step": 18960 }, { "epoch": 1.9000851404817949, "grad_norm": 2.571535110473633, "learning_rate": 3.429479884159351e-05, "loss": 0.5244, "step": 18970 }, { "epoch": 1.9010867932087945, "grad_norm": 2.148982286453247, "learning_rate": 3.4280162354984e-05, "loss": 0.4673, "step": 18980 }, { "epoch": 1.902088445935794, "grad_norm": 3.1783385276794434, "learning_rate": 3.426552217819187e-05, "loss": 0.4956, "step": 18990 }, { "epoch": 1.9030900986627937, "grad_norm": 2.790367841720581, "learning_rate": 3.425087831703868e-05, "loss": 0.5036, "step": 19000 }, { "epoch": 1.9040917513897933, "grad_norm": 2.6144115924835205, "learning_rate": 3.423623077734743e-05, "loss": 0.5183, "step": 19010 }, { "epoch": 1.9050934041167928, "grad_norm": 1.9311896562576294, "learning_rate": 3.4221579564942604e-05, "loss": 0.5641, "step": 19020 }, { "epoch": 1.9060950568437922, "grad_norm": 2.1071672439575195, "learning_rate": 3.4206924685650143e-05, "loss": 0.5145, "step": 19030 }, { "epoch": 1.9070967095707918, "grad_norm": 2.4376401901245117, "learning_rate": 3.419226614529744e-05, "loss": 0.4721, "step": 19040 }, { "epoch": 1.9080983622977914, "grad_norm": 2.126660108566284, "learning_rate": 3.417760394971335e-05, "loss": 0.4731, "step": 19050 }, { "epoch": 1.9091000150247908, "grad_norm": 2.3037705421447754, "learning_rate": 3.4162938104728165e-05, "loss": 0.5206, "step": 19060 }, { "epoch": 1.9101016677517904, "grad_norm": 2.035919427871704, "learning_rate": 3.4148268616173655e-05, "loss": 0.564, "step": 19070 }, { "epoch": 1.91110332047879, "grad_norm": 2.29209566116333, "learning_rate": 3.413359548988303e-05, "loss": 0.4901, "step": 19080 }, { "epoch": 1.9121049732057895, "grad_norm": 3.5236032009124756, "learning_rate": 3.4118918731690925e-05, "loss": 0.564, "step": 19090 }, { "epoch": 1.9131066259327891, "grad_norm": 3.52905011177063, "learning_rate": 3.410423834743345e-05, "loss": 0.6002, "step": 19100 }, { "epoch": 1.9141082786597887, "grad_norm": 2.6410086154937744, "learning_rate": 3.408955434294813e-05, "loss": 0.5538, "step": 19110 }, { "epoch": 1.9151099313867883, "grad_norm": 2.0407655239105225, "learning_rate": 3.407486672407395e-05, "loss": 0.5388, "step": 19120 }, { "epoch": 1.916111584113788, "grad_norm": 2.2420051097869873, "learning_rate": 3.406017549665134e-05, "loss": 0.567, "step": 19130 }, { "epoch": 1.9171132368407873, "grad_norm": 2.550873041152954, "learning_rate": 3.404548066652211e-05, "loss": 0.5538, "step": 19140 }, { "epoch": 1.9181148895677869, "grad_norm": 2.334690570831299, "learning_rate": 3.403078223952959e-05, "loss": 0.5259, "step": 19150 }, { "epoch": 1.9191165422947862, "grad_norm": 1.7644654512405396, "learning_rate": 3.4016080221518455e-05, "loss": 0.5255, "step": 19160 }, { "epoch": 1.9201181950217858, "grad_norm": 2.645530939102173, "learning_rate": 3.4001374618334856e-05, "loss": 0.5159, "step": 19170 }, { "epoch": 1.9211198477487854, "grad_norm": 2.168233871459961, "learning_rate": 3.398666543582637e-05, "loss": 0.5738, "step": 19180 }, { "epoch": 1.922121500475785, "grad_norm": 2.2151002883911133, "learning_rate": 3.397195267984197e-05, "loss": 0.558, "step": 19190 }, { "epoch": 1.9231231532027846, "grad_norm": 1.8769047260284424, "learning_rate": 3.395723635623208e-05, "loss": 0.5495, "step": 19200 }, { "epoch": 1.9241248059297842, "grad_norm": 2.8317298889160156, "learning_rate": 3.394251647084852e-05, "loss": 0.5709, "step": 19210 }, { "epoch": 1.9251264586567838, "grad_norm": 2.330803871154785, "learning_rate": 3.392779302954454e-05, "loss": 0.5585, "step": 19220 }, { "epoch": 1.9261281113837834, "grad_norm": 2.302245616912842, "learning_rate": 3.39130660381748e-05, "loss": 0.5349, "step": 19230 }, { "epoch": 1.9271297641107827, "grad_norm": 2.2176613807678223, "learning_rate": 3.389833550259536e-05, "loss": 0.47, "step": 19240 }, { "epoch": 1.9281314168377823, "grad_norm": 1.9405122995376587, "learning_rate": 3.388360142866371e-05, "loss": 0.5174, "step": 19250 }, { "epoch": 1.929133069564782, "grad_norm": 2.862607479095459, "learning_rate": 3.386886382223874e-05, "loss": 0.5595, "step": 19260 }, { "epoch": 1.9301347222917813, "grad_norm": 1.8453691005706787, "learning_rate": 3.385412268918073e-05, "loss": 0.5051, "step": 19270 }, { "epoch": 1.9311363750187809, "grad_norm": 2.0955848693847656, "learning_rate": 3.383937803535139e-05, "loss": 0.5681, "step": 19280 }, { "epoch": 1.9321380277457805, "grad_norm": 2.8637776374816895, "learning_rate": 3.3824629866613795e-05, "loss": 0.5354, "step": 19290 }, { "epoch": 1.93313968047278, "grad_norm": 1.8948688507080078, "learning_rate": 3.380987818883245e-05, "loss": 0.5965, "step": 19300 }, { "epoch": 1.9341413331997797, "grad_norm": 2.108222246170044, "learning_rate": 3.379512300787324e-05, "loss": 0.5391, "step": 19310 }, { "epoch": 1.9351429859267792, "grad_norm": 2.2994465827941895, "learning_rate": 3.3780364329603445e-05, "loss": 0.532, "step": 19320 }, { "epoch": 1.9361446386537788, "grad_norm": 2.433121681213379, "learning_rate": 3.376560215989174e-05, "loss": 0.5057, "step": 19330 }, { "epoch": 1.9371462913807784, "grad_norm": 2.2182536125183105, "learning_rate": 3.3750836504608176e-05, "loss": 0.5601, "step": 19340 }, { "epoch": 1.9381479441077778, "grad_norm": 3.7288570404052734, "learning_rate": 3.373606736962419e-05, "loss": 0.587, "step": 19350 }, { "epoch": 1.9391495968347774, "grad_norm": 2.0662009716033936, "learning_rate": 3.372129476081264e-05, "loss": 0.5102, "step": 19360 }, { "epoch": 1.940151249561777, "grad_norm": 2.209134817123413, "learning_rate": 3.37065186840477e-05, "loss": 0.5374, "step": 19370 }, { "epoch": 1.9411529022887763, "grad_norm": 1.951745867729187, "learning_rate": 3.369173914520499e-05, "loss": 0.5269, "step": 19380 }, { "epoch": 1.942154555015776, "grad_norm": 2.849954605102539, "learning_rate": 3.367695615016146e-05, "loss": 0.5075, "step": 19390 }, { "epoch": 1.9431562077427755, "grad_norm": 2.189919948577881, "learning_rate": 3.3662169704795454e-05, "loss": 0.5444, "step": 19400 }, { "epoch": 1.9441578604697751, "grad_norm": 2.3285562992095947, "learning_rate": 3.364737981498668e-05, "loss": 0.527, "step": 19410 }, { "epoch": 1.9451595131967747, "grad_norm": 2.5581185817718506, "learning_rate": 3.363258648661623e-05, "loss": 0.5201, "step": 19420 }, { "epoch": 1.9461611659237743, "grad_norm": 2.3942644596099854, "learning_rate": 3.361778972556655e-05, "loss": 0.5137, "step": 19430 }, { "epoch": 1.947162818650774, "grad_norm": 2.7398712635040283, "learning_rate": 3.360298953772144e-05, "loss": 0.597, "step": 19440 }, { "epoch": 1.9481644713777735, "grad_norm": 2.240510940551758, "learning_rate": 3.358818592896609e-05, "loss": 0.5167, "step": 19450 }, { "epoch": 1.9491661241047729, "grad_norm": 2.2413768768310547, "learning_rate": 3.357337890518704e-05, "loss": 0.4973, "step": 19460 }, { "epoch": 1.9501677768317724, "grad_norm": 2.2250759601593018, "learning_rate": 3.355856847227217e-05, "loss": 0.5633, "step": 19470 }, { "epoch": 1.9511694295587718, "grad_norm": 2.3619704246520996, "learning_rate": 3.3543754636110755e-05, "loss": 0.5079, "step": 19480 }, { "epoch": 1.9521710822857714, "grad_norm": 1.8453508615493774, "learning_rate": 3.3528937402593375e-05, "loss": 0.535, "step": 19490 }, { "epoch": 1.953172735012771, "grad_norm": 1.8270654678344727, "learning_rate": 3.351411677761199e-05, "loss": 0.4869, "step": 19500 }, { "epoch": 1.9541743877397706, "grad_norm": 1.9440797567367554, "learning_rate": 3.349929276705992e-05, "loss": 0.5365, "step": 19510 }, { "epoch": 1.9551760404667702, "grad_norm": 2.2624425888061523, "learning_rate": 3.3484465376831784e-05, "loss": 0.5449, "step": 19520 }, { "epoch": 1.9561776931937698, "grad_norm": 2.4637839794158936, "learning_rate": 3.3469634612823616e-05, "loss": 0.5565, "step": 19530 }, { "epoch": 1.9571793459207694, "grad_norm": 2.3287465572357178, "learning_rate": 3.345480048093272e-05, "loss": 0.5509, "step": 19540 }, { "epoch": 1.958180998647769, "grad_norm": 1.6043740510940552, "learning_rate": 3.3439962987057774e-05, "loss": 0.5079, "step": 19550 }, { "epoch": 1.9591826513747683, "grad_norm": 2.930209159851074, "learning_rate": 3.3425122137098794e-05, "loss": 0.5571, "step": 19560 }, { "epoch": 1.960184304101768, "grad_norm": 2.760525941848755, "learning_rate": 3.341027793695713e-05, "loss": 0.5042, "step": 19570 }, { "epoch": 1.9611859568287675, "grad_norm": 2.3335533142089844, "learning_rate": 3.3395430392535455e-05, "loss": 0.549, "step": 19580 }, { "epoch": 1.9621876095557669, "grad_norm": 2.1368236541748047, "learning_rate": 3.338057950973778e-05, "loss": 0.4718, "step": 19590 }, { "epoch": 1.9631892622827665, "grad_norm": 1.9596712589263916, "learning_rate": 3.336572529446944e-05, "loss": 0.5775, "step": 19600 }, { "epoch": 1.964190915009766, "grad_norm": 2.1493420600891113, "learning_rate": 3.335086775263709e-05, "loss": 0.5564, "step": 19610 }, { "epoch": 1.9651925677367656, "grad_norm": 2.2132809162139893, "learning_rate": 3.333600689014872e-05, "loss": 0.5046, "step": 19620 }, { "epoch": 1.9661942204637652, "grad_norm": 2.2998745441436768, "learning_rate": 3.3321142712913625e-05, "loss": 0.4809, "step": 19630 }, { "epoch": 1.9671958731907648, "grad_norm": 1.8581064939498901, "learning_rate": 3.330627522684244e-05, "loss": 0.5627, "step": 19640 }, { "epoch": 1.9681975259177644, "grad_norm": 2.277087926864624, "learning_rate": 3.329140443784709e-05, "loss": 0.5622, "step": 19650 }, { "epoch": 1.969199178644764, "grad_norm": 2.815183162689209, "learning_rate": 3.3276530351840825e-05, "loss": 0.4914, "step": 19660 }, { "epoch": 1.9702008313717634, "grad_norm": 3.2514102458953857, "learning_rate": 3.326165297473821e-05, "loss": 0.5576, "step": 19670 }, { "epoch": 1.971202484098763, "grad_norm": 1.6029939651489258, "learning_rate": 3.324677231245512e-05, "loss": 0.5235, "step": 19680 }, { "epoch": 1.9722041368257623, "grad_norm": 1.8514877557754517, "learning_rate": 3.323188837090874e-05, "loss": 0.5727, "step": 19690 }, { "epoch": 1.973205789552762, "grad_norm": 2.3635377883911133, "learning_rate": 3.3217001156017526e-05, "loss": 0.4909, "step": 19700 }, { "epoch": 1.9742074422797615, "grad_norm": 2.3811750411987305, "learning_rate": 3.320211067370128e-05, "loss": 0.5762, "step": 19710 }, { "epoch": 1.9752090950067611, "grad_norm": 2.026413679122925, "learning_rate": 3.318721692988108e-05, "loss": 0.4796, "step": 19720 }, { "epoch": 1.9762107477337607, "grad_norm": 2.4324848651885986, "learning_rate": 3.317231993047929e-05, "loss": 0.4812, "step": 19730 }, { "epoch": 1.9772124004607603, "grad_norm": 3.1195809841156006, "learning_rate": 3.3157419681419616e-05, "loss": 0.5272, "step": 19740 }, { "epoch": 1.97821405318776, "grad_norm": 1.83283269405365, "learning_rate": 3.314251618862699e-05, "loss": 0.5282, "step": 19750 }, { "epoch": 1.9792157059147595, "grad_norm": 1.9941926002502441, "learning_rate": 3.3127609458027675e-05, "loss": 0.4591, "step": 19760 }, { "epoch": 1.9802173586417589, "grad_norm": 2.339540958404541, "learning_rate": 3.311269949554923e-05, "loss": 0.4986, "step": 19770 }, { "epoch": 1.9812190113687584, "grad_norm": 1.8773863315582275, "learning_rate": 3.309778630712047e-05, "loss": 0.6094, "step": 19780 }, { "epoch": 1.982220664095758, "grad_norm": 2.326773166656494, "learning_rate": 3.30828698986715e-05, "loss": 0.4437, "step": 19790 }, { "epoch": 1.9832223168227574, "grad_norm": 2.306333541870117, "learning_rate": 3.3067950276133716e-05, "loss": 0.4891, "step": 19800 }, { "epoch": 1.984223969549757, "grad_norm": 2.0173776149749756, "learning_rate": 3.30530274454398e-05, "loss": 0.5067, "step": 19810 }, { "epoch": 1.9852256222767566, "grad_norm": 2.5315053462982178, "learning_rate": 3.303810141252368e-05, "loss": 0.4984, "step": 19820 }, { "epoch": 1.9862272750037562, "grad_norm": 2.567328453063965, "learning_rate": 3.302317218332058e-05, "loss": 0.6134, "step": 19830 }, { "epoch": 1.9872289277307558, "grad_norm": 2.731470823287964, "learning_rate": 3.300823976376699e-05, "loss": 0.4655, "step": 19840 }, { "epoch": 1.9882305804577554, "grad_norm": 1.868723750114441, "learning_rate": 3.2993304159800666e-05, "loss": 0.5736, "step": 19850 }, { "epoch": 1.989232233184755, "grad_norm": 2.778196334838867, "learning_rate": 3.2978365377360625e-05, "loss": 0.5701, "step": 19860 }, { "epoch": 1.9902338859117545, "grad_norm": 2.3369123935699463, "learning_rate": 3.2963423422387175e-05, "loss": 0.5677, "step": 19870 }, { "epoch": 1.991235538638754, "grad_norm": 2.2308642864227295, "learning_rate": 3.294847830082184e-05, "loss": 0.5402, "step": 19880 }, { "epoch": 1.9922371913657535, "grad_norm": 3.2330994606018066, "learning_rate": 3.293353001860745e-05, "loss": 0.5364, "step": 19890 }, { "epoch": 1.993238844092753, "grad_norm": 2.73323392868042, "learning_rate": 3.291857858168805e-05, "loss": 0.5763, "step": 19900 }, { "epoch": 1.9942404968197525, "grad_norm": 1.7553918361663818, "learning_rate": 3.2903623996008984e-05, "loss": 0.5238, "step": 19910 }, { "epoch": 1.995242149546752, "grad_norm": 1.860754370689392, "learning_rate": 3.2888666267516806e-05, "loss": 0.5092, "step": 19920 }, { "epoch": 1.9962438022737516, "grad_norm": 2.701542377471924, "learning_rate": 3.287370540215934e-05, "loss": 0.4812, "step": 19930 }, { "epoch": 1.9972454550007512, "grad_norm": 2.274592638015747, "learning_rate": 3.285874140588566e-05, "loss": 0.5499, "step": 19940 }, { "epoch": 1.9982471077277508, "grad_norm": 2.010769844055176, "learning_rate": 3.2843774284646074e-05, "loss": 0.4987, "step": 19950 }, { "epoch": 1.9992487604547504, "grad_norm": 1.8435958623886108, "learning_rate": 3.282880404439214e-05, "loss": 0.5644, "step": 19960 }, { "epoch": 2.0002003305454, "grad_norm": 2.158094644546509, "learning_rate": 3.281383069107666e-05, "loss": 0.5283, "step": 19970 }, { "epoch": 2.0012019832723995, "grad_norm": 1.6558252573013306, "learning_rate": 3.279885423065367e-05, "loss": 0.4891, "step": 19980 }, { "epoch": 2.002203635999399, "grad_norm": 2.214022159576416, "learning_rate": 3.278387466907843e-05, "loss": 0.5128, "step": 19990 }, { "epoch": 2.0032052887263987, "grad_norm": 2.0052435398101807, "learning_rate": 3.276889201230745e-05, "loss": 0.4326, "step": 20000 }, { "epoch": 2.0032052887263987, "eval_bleu": 0.2002503205345812, "eval_loss": 0.5508657097816467, "eval_rouge1": 0.6375570480064021, "eval_rouge2": 0.4898024330131999, "eval_rougeL": 0.6005766639745327, "eval_runtime": 89842.9594, "eval_samples_per_second": 0.198, "eval_steps_per_second": 0.025, "eval_wer": 1.7028008589955927, "step": 20000 }, { "epoch": 2.0042069414533983, "grad_norm": 2.0601089000701904, "learning_rate": 3.275390626629846e-05, "loss": 0.453, "step": 20010 }, { "epoch": 2.005208594180398, "grad_norm": 2.3491313457489014, "learning_rate": 3.273891743701043e-05, "loss": 0.4925, "step": 20020 }, { "epoch": 2.006210246907397, "grad_norm": 2.14555025100708, "learning_rate": 3.272392553040354e-05, "loss": 0.5125, "step": 20030 }, { "epoch": 2.0072118996343966, "grad_norm": 2.575279712677002, "learning_rate": 3.270893055243921e-05, "loss": 0.5094, "step": 20040 }, { "epoch": 2.008213552361396, "grad_norm": 2.2042462825775146, "learning_rate": 3.2693932509080066e-05, "loss": 0.4826, "step": 20050 }, { "epoch": 2.009215205088396, "grad_norm": 1.9596309661865234, "learning_rate": 3.267893140628996e-05, "loss": 0.5047, "step": 20060 }, { "epoch": 2.0102168578153954, "grad_norm": 1.733544111251831, "learning_rate": 3.266392725003398e-05, "loss": 0.539, "step": 20070 }, { "epoch": 2.011218510542395, "grad_norm": 2.1472814083099365, "learning_rate": 3.264892004627838e-05, "loss": 0.5984, "step": 20080 }, { "epoch": 2.0122201632693946, "grad_norm": 2.2403430938720703, "learning_rate": 3.2633909800990666e-05, "loss": 0.5175, "step": 20090 }, { "epoch": 2.013221815996394, "grad_norm": 1.9223722219467163, "learning_rate": 3.261889652013955e-05, "loss": 0.4314, "step": 20100 }, { "epoch": 2.0142234687233938, "grad_norm": 2.1457340717315674, "learning_rate": 3.2603880209694946e-05, "loss": 0.4796, "step": 20110 }, { "epoch": 2.0152251214503933, "grad_norm": 2.3787331581115723, "learning_rate": 3.258886087562796e-05, "loss": 0.4678, "step": 20120 }, { "epoch": 2.0162267741773925, "grad_norm": 2.1419434547424316, "learning_rate": 3.2573838523910905e-05, "loss": 0.5041, "step": 20130 }, { "epoch": 2.017228426904392, "grad_norm": 2.3345863819122314, "learning_rate": 3.255881316051732e-05, "loss": 0.5159, "step": 20140 }, { "epoch": 2.0182300796313917, "grad_norm": 2.441622734069824, "learning_rate": 3.25437847914219e-05, "loss": 0.4615, "step": 20150 }, { "epoch": 2.0192317323583913, "grad_norm": 1.5690834522247314, "learning_rate": 3.2528753422600564e-05, "loss": 0.4809, "step": 20160 }, { "epoch": 2.020233385085391, "grad_norm": 1.9578380584716797, "learning_rate": 3.251371906003042e-05, "loss": 0.5748, "step": 20170 }, { "epoch": 2.0212350378123904, "grad_norm": 1.7934218645095825, "learning_rate": 3.249868170968977e-05, "loss": 0.4488, "step": 20180 }, { "epoch": 2.02223669053939, "grad_norm": 2.1038122177124023, "learning_rate": 3.248364137755808e-05, "loss": 0.5006, "step": 20190 }, { "epoch": 2.0232383432663896, "grad_norm": 1.5591011047363281, "learning_rate": 3.2468598069616016e-05, "loss": 0.4804, "step": 20200 }, { "epoch": 2.024239995993389, "grad_norm": 2.1266472339630127, "learning_rate": 3.245355179184545e-05, "loss": 0.4946, "step": 20210 }, { "epoch": 2.025241648720389, "grad_norm": 2.56298565864563, "learning_rate": 3.24385025502294e-05, "loss": 0.4698, "step": 20220 }, { "epoch": 2.0262433014473884, "grad_norm": 1.669932246208191, "learning_rate": 3.242345035075208e-05, "loss": 0.4983, "step": 20230 }, { "epoch": 2.0272449541743875, "grad_norm": 2.1388626098632812, "learning_rate": 3.240839519939887e-05, "loss": 0.491, "step": 20240 }, { "epoch": 2.028246606901387, "grad_norm": 2.082549810409546, "learning_rate": 3.239333710215635e-05, "loss": 0.4602, "step": 20250 }, { "epoch": 2.0292482596283867, "grad_norm": 3.176657199859619, "learning_rate": 3.237827606501223e-05, "loss": 0.5682, "step": 20260 }, { "epoch": 2.0302499123553863, "grad_norm": 2.1459405422210693, "learning_rate": 3.236321209395543e-05, "loss": 0.4811, "step": 20270 }, { "epoch": 2.031251565082386, "grad_norm": 2.0369656085968018, "learning_rate": 3.2348145194976e-05, "loss": 0.507, "step": 20280 }, { "epoch": 2.0322532178093855, "grad_norm": 2.6629416942596436, "learning_rate": 3.233307537406519e-05, "loss": 0.4818, "step": 20290 }, { "epoch": 2.033254870536385, "grad_norm": 2.0255343914031982, "learning_rate": 3.231800263721538e-05, "loss": 0.5273, "step": 20300 }, { "epoch": 2.0342565232633847, "grad_norm": 1.8463060855865479, "learning_rate": 3.230292699042014e-05, "loss": 0.4931, "step": 20310 }, { "epoch": 2.0352581759903843, "grad_norm": 2.6563429832458496, "learning_rate": 3.228784843967416e-05, "loss": 0.5148, "step": 20320 }, { "epoch": 2.036259828717384, "grad_norm": 2.2401204109191895, "learning_rate": 3.2272766990973316e-05, "loss": 0.4384, "step": 20330 }, { "epoch": 2.037261481444383, "grad_norm": 2.7477822303771973, "learning_rate": 3.225768265031463e-05, "loss": 0.5228, "step": 20340 }, { "epoch": 2.0382631341713826, "grad_norm": 2.590658187866211, "learning_rate": 3.2242595423696277e-05, "loss": 0.4807, "step": 20350 }, { "epoch": 2.039264786898382, "grad_norm": 2.0068235397338867, "learning_rate": 3.222750531711755e-05, "loss": 0.5156, "step": 20360 }, { "epoch": 2.040266439625382, "grad_norm": 2.5355372428894043, "learning_rate": 3.2212412336578926e-05, "loss": 0.6042, "step": 20370 }, { "epoch": 2.0412680923523814, "grad_norm": 2.3344709873199463, "learning_rate": 3.2197316488081995e-05, "loss": 0.5159, "step": 20380 }, { "epoch": 2.042269745079381, "grad_norm": 2.0762157440185547, "learning_rate": 3.218221777762952e-05, "loss": 0.4592, "step": 20390 }, { "epoch": 2.0432713978063806, "grad_norm": 2.093581199645996, "learning_rate": 3.216711621122537e-05, "loss": 0.4531, "step": 20400 }, { "epoch": 2.04427305053338, "grad_norm": 2.9248385429382324, "learning_rate": 3.215201179487456e-05, "loss": 0.469, "step": 20410 }, { "epoch": 2.0452747032603797, "grad_norm": 1.7484266757965088, "learning_rate": 3.213690453458325e-05, "loss": 0.4237, "step": 20420 }, { "epoch": 2.0462763559873793, "grad_norm": 1.9926973581314087, "learning_rate": 3.212179443635872e-05, "loss": 0.471, "step": 20430 }, { "epoch": 2.047278008714379, "grad_norm": 1.6716692447662354, "learning_rate": 3.210668150620937e-05, "loss": 0.4656, "step": 20440 }, { "epoch": 2.048279661441378, "grad_norm": 1.3834606409072876, "learning_rate": 3.209156575014475e-05, "loss": 0.4969, "step": 20450 }, { "epoch": 2.0492813141683777, "grad_norm": 1.8815782070159912, "learning_rate": 3.207644717417551e-05, "loss": 0.4847, "step": 20460 }, { "epoch": 2.0502829668953773, "grad_norm": 1.73691987991333, "learning_rate": 3.2061325784313436e-05, "loss": 0.4458, "step": 20470 }, { "epoch": 2.051284619622377, "grad_norm": 2.1354963779449463, "learning_rate": 3.204620158657144e-05, "loss": 0.4935, "step": 20480 }, { "epoch": 2.0522862723493764, "grad_norm": 1.9221725463867188, "learning_rate": 3.20310745869635e-05, "loss": 0.4741, "step": 20490 }, { "epoch": 2.053287925076376, "grad_norm": 2.065915822982788, "learning_rate": 3.20159447915048e-05, "loss": 0.5215, "step": 20500 }, { "epoch": 2.0542895778033756, "grad_norm": 2.0788304805755615, "learning_rate": 3.200081220621154e-05, "loss": 0.5446, "step": 20510 }, { "epoch": 2.055291230530375, "grad_norm": 2.2969303131103516, "learning_rate": 3.1985676837101095e-05, "loss": 0.5335, "step": 20520 }, { "epoch": 2.056292883257375, "grad_norm": 1.6240125894546509, "learning_rate": 3.1970538690191914e-05, "loss": 0.5019, "step": 20530 }, { "epoch": 2.0572945359843744, "grad_norm": 2.1084020137786865, "learning_rate": 3.195539777150356e-05, "loss": 0.4362, "step": 20540 }, { "epoch": 2.058296188711374, "grad_norm": 1.850347876548767, "learning_rate": 3.194025408705671e-05, "loss": 0.4695, "step": 20550 }, { "epoch": 2.059297841438373, "grad_norm": 2.2284724712371826, "learning_rate": 3.192510764287311e-05, "loss": 0.4978, "step": 20560 }, { "epoch": 2.0602994941653727, "grad_norm": 2.4977900981903076, "learning_rate": 3.1909958444975636e-05, "loss": 0.5228, "step": 20570 }, { "epoch": 2.0613011468923723, "grad_norm": 3.1662728786468506, "learning_rate": 3.189480649938825e-05, "loss": 0.4964, "step": 20580 }, { "epoch": 2.062302799619372, "grad_norm": 1.9692362546920776, "learning_rate": 3.1879651812135995e-05, "loss": 0.4977, "step": 20590 }, { "epoch": 2.0633044523463715, "grad_norm": 1.8094409704208374, "learning_rate": 3.1864494389245016e-05, "loss": 0.4916, "step": 20600 }, { "epoch": 2.064306105073371, "grad_norm": 2.69331431388855, "learning_rate": 3.184933423674254e-05, "loss": 0.4704, "step": 20610 }, { "epoch": 2.0653077578003707, "grad_norm": 1.9121180772781372, "learning_rate": 3.183417136065686e-05, "loss": 0.5054, "step": 20620 }, { "epoch": 2.0663094105273703, "grad_norm": 1.9425513744354248, "learning_rate": 3.1819005767017406e-05, "loss": 0.437, "step": 20630 }, { "epoch": 2.06731106325437, "grad_norm": 2.331561326980591, "learning_rate": 3.180383746185464e-05, "loss": 0.4489, "step": 20640 }, { "epoch": 2.0683127159813695, "grad_norm": 2.1345226764678955, "learning_rate": 3.178866645120012e-05, "loss": 0.488, "step": 20650 }, { "epoch": 2.0693143687083686, "grad_norm": 2.7039108276367188, "learning_rate": 3.1773492741086474e-05, "loss": 0.4713, "step": 20660 }, { "epoch": 2.070316021435368, "grad_norm": 2.335880756378174, "learning_rate": 3.1758316337547414e-05, "loss": 0.5457, "step": 20670 }, { "epoch": 2.071317674162368, "grad_norm": 1.7798826694488525, "learning_rate": 3.1743137246617714e-05, "loss": 0.5122, "step": 20680 }, { "epoch": 2.0723193268893674, "grad_norm": 1.9185101985931396, "learning_rate": 3.172795547433321e-05, "loss": 0.5018, "step": 20690 }, { "epoch": 2.073320979616367, "grad_norm": 2.214864730834961, "learning_rate": 3.171277102673082e-05, "loss": 0.4785, "step": 20700 }, { "epoch": 2.0743226323433666, "grad_norm": 2.6873626708984375, "learning_rate": 3.169758390984852e-05, "loss": 0.4523, "step": 20710 }, { "epoch": 2.075324285070366, "grad_norm": 1.6275378465652466, "learning_rate": 3.168239412972534e-05, "loss": 0.5009, "step": 20720 }, { "epoch": 2.0763259377973657, "grad_norm": 1.7685691118240356, "learning_rate": 3.166720169240138e-05, "loss": 0.4612, "step": 20730 }, { "epoch": 2.0773275905243653, "grad_norm": 2.1459972858428955, "learning_rate": 3.165200660391779e-05, "loss": 0.441, "step": 20740 }, { "epoch": 2.078329243251365, "grad_norm": 2.075395107269287, "learning_rate": 3.1636808870316775e-05, "loss": 0.4499, "step": 20750 }, { "epoch": 2.0793308959783645, "grad_norm": 2.5253570079803467, "learning_rate": 3.1621608497641605e-05, "loss": 0.5045, "step": 20760 }, { "epoch": 2.0803325487053637, "grad_norm": 2.685187816619873, "learning_rate": 3.1606405491936566e-05, "loss": 0.5111, "step": 20770 }, { "epoch": 2.0813342014323633, "grad_norm": 2.0310208797454834, "learning_rate": 3.159119985924702e-05, "loss": 0.4889, "step": 20780 }, { "epoch": 2.082335854159363, "grad_norm": 2.0942888259887695, "learning_rate": 3.157599160561937e-05, "loss": 0.4823, "step": 20790 }, { "epoch": 2.0833375068863624, "grad_norm": 2.0800859928131104, "learning_rate": 3.156078073710105e-05, "loss": 0.4514, "step": 20800 }, { "epoch": 2.084339159613362, "grad_norm": 2.227480411529541, "learning_rate": 3.1545567259740554e-05, "loss": 0.4826, "step": 20810 }, { "epoch": 2.0853408123403616, "grad_norm": 2.061187744140625, "learning_rate": 3.153035117958739e-05, "loss": 0.4673, "step": 20820 }, { "epoch": 2.086342465067361, "grad_norm": 1.741713047027588, "learning_rate": 3.15151325026921e-05, "loss": 0.4825, "step": 20830 }, { "epoch": 2.087344117794361, "grad_norm": 4.484399795532227, "learning_rate": 3.14999112351063e-05, "loss": 0.5185, "step": 20840 }, { "epoch": 2.0883457705213604, "grad_norm": 1.8805603981018066, "learning_rate": 3.148468738288258e-05, "loss": 0.4914, "step": 20850 }, { "epoch": 2.08934742324836, "grad_norm": 2.6285297870635986, "learning_rate": 3.146946095207459e-05, "loss": 0.4523, "step": 20860 }, { "epoch": 2.090349075975359, "grad_norm": 2.502845525741577, "learning_rate": 3.1454231948737e-05, "loss": 0.4848, "step": 20870 }, { "epoch": 2.0913507287023587, "grad_norm": 2.0391061305999756, "learning_rate": 3.1439000378925496e-05, "loss": 0.5003, "step": 20880 }, { "epoch": 2.0923523814293583, "grad_norm": 2.1008460521698, "learning_rate": 3.1423766248696804e-05, "loss": 0.4715, "step": 20890 }, { "epoch": 2.093354034156358, "grad_norm": 2.340632200241089, "learning_rate": 3.1408529564108644e-05, "loss": 0.5281, "step": 20900 }, { "epoch": 2.0943556868833575, "grad_norm": 2.45475172996521, "learning_rate": 3.139329033121977e-05, "loss": 0.4497, "step": 20910 }, { "epoch": 2.095357339610357, "grad_norm": 1.7948698997497559, "learning_rate": 3.137804855608993e-05, "loss": 0.4581, "step": 20920 }, { "epoch": 2.0963589923373567, "grad_norm": 2.1844940185546875, "learning_rate": 3.1362804244779906e-05, "loss": 0.447, "step": 20930 }, { "epoch": 2.0973606450643563, "grad_norm": 2.17244553565979, "learning_rate": 3.134755740335147e-05, "loss": 0.5166, "step": 20940 }, { "epoch": 2.098362297791356, "grad_norm": 2.8408572673797607, "learning_rate": 3.133230803786741e-05, "loss": 0.4978, "step": 20950 }, { "epoch": 2.0993639505183554, "grad_norm": 2.3953614234924316, "learning_rate": 3.131705615439152e-05, "loss": 0.4383, "step": 20960 }, { "epoch": 2.100365603245355, "grad_norm": 1.9969464540481567, "learning_rate": 3.130180175898857e-05, "loss": 0.4647, "step": 20970 }, { "epoch": 2.101367255972354, "grad_norm": 1.826015591621399, "learning_rate": 3.1286544857724366e-05, "loss": 0.4327, "step": 20980 }, { "epoch": 2.1023689086993538, "grad_norm": 2.493840217590332, "learning_rate": 3.1271285456665696e-05, "loss": 0.4594, "step": 20990 }, { "epoch": 2.1033705614263534, "grad_norm": 2.08193039894104, "learning_rate": 3.125602356188032e-05, "loss": 0.4489, "step": 21000 }, { "epoch": 2.104372214153353, "grad_norm": 2.3533523082733154, "learning_rate": 3.124075917943704e-05, "loss": 0.4333, "step": 21010 }, { "epoch": 2.1053738668803526, "grad_norm": 2.025791645050049, "learning_rate": 3.122549231540558e-05, "loss": 0.4217, "step": 21020 }, { "epoch": 2.106375519607352, "grad_norm": 2.909153699874878, "learning_rate": 3.121022297585672e-05, "loss": 0.5087, "step": 21030 }, { "epoch": 2.1073771723343517, "grad_norm": 3.3912322521209717, "learning_rate": 3.1194951166862165e-05, "loss": 0.4623, "step": 21040 }, { "epoch": 2.1083788250613513, "grad_norm": 2.3475584983825684, "learning_rate": 3.117967689449464e-05, "loss": 0.5099, "step": 21050 }, { "epoch": 2.109380477788351, "grad_norm": 2.320850133895874, "learning_rate": 3.116440016482784e-05, "loss": 0.4373, "step": 21060 }, { "epoch": 2.1103821305153505, "grad_norm": 1.9819415807724, "learning_rate": 3.1149120983936434e-05, "loss": 0.469, "step": 21070 }, { "epoch": 2.11138378324235, "grad_norm": 1.4946792125701904, "learning_rate": 3.1133839357896055e-05, "loss": 0.4708, "step": 21080 }, { "epoch": 2.1123854359693492, "grad_norm": 2.409613847732544, "learning_rate": 3.1118555292783336e-05, "loss": 0.513, "step": 21090 }, { "epoch": 2.113387088696349, "grad_norm": 2.309664011001587, "learning_rate": 3.110326879467585e-05, "loss": 0.521, "step": 21100 }, { "epoch": 2.1143887414233484, "grad_norm": 1.7967244386672974, "learning_rate": 3.108797986965217e-05, "loss": 0.4797, "step": 21110 }, { "epoch": 2.115390394150348, "grad_norm": 2.344731330871582, "learning_rate": 3.107268852379179e-05, "loss": 0.5341, "step": 21120 }, { "epoch": 2.1163920468773476, "grad_norm": 1.8803640604019165, "learning_rate": 3.105739476317521e-05, "loss": 0.4447, "step": 21130 }, { "epoch": 2.117393699604347, "grad_norm": 2.1133954524993896, "learning_rate": 3.104209859388387e-05, "loss": 0.5363, "step": 21140 }, { "epoch": 2.118395352331347, "grad_norm": 2.5146517753601074, "learning_rate": 3.102680002200017e-05, "loss": 0.5577, "step": 21150 }, { "epoch": 2.1193970050583464, "grad_norm": 1.94539213180542, "learning_rate": 3.1011499053607445e-05, "loss": 0.4741, "step": 21160 }, { "epoch": 2.120398657785346, "grad_norm": 2.0953662395477295, "learning_rate": 3.0996195694790037e-05, "loss": 0.4667, "step": 21170 }, { "epoch": 2.1214003105123456, "grad_norm": 2.344572067260742, "learning_rate": 3.098088995163318e-05, "loss": 0.5503, "step": 21180 }, { "epoch": 2.1224019632393447, "grad_norm": 2.016023874282837, "learning_rate": 3.096558183022309e-05, "loss": 0.5102, "step": 21190 }, { "epoch": 2.1234036159663443, "grad_norm": 1.793877363204956, "learning_rate": 3.095027133664692e-05, "loss": 0.4622, "step": 21200 }, { "epoch": 2.124405268693344, "grad_norm": 1.8648747205734253, "learning_rate": 3.093495847699276e-05, "loss": 0.5166, "step": 21210 }, { "epoch": 2.1254069214203435, "grad_norm": 2.274259328842163, "learning_rate": 3.091964325734965e-05, "loss": 0.503, "step": 21220 }, { "epoch": 2.126408574147343, "grad_norm": 2.119114398956299, "learning_rate": 3.090432568380757e-05, "loss": 0.4296, "step": 21230 }, { "epoch": 2.1274102268743427, "grad_norm": 2.1176578998565674, "learning_rate": 3.088900576245742e-05, "loss": 0.4331, "step": 21240 }, { "epoch": 2.1284118796013423, "grad_norm": 2.4417192935943604, "learning_rate": 3.087368349939106e-05, "loss": 0.5897, "step": 21250 }, { "epoch": 2.129413532328342, "grad_norm": 2.3847012519836426, "learning_rate": 3.0858358900701254e-05, "loss": 0.4446, "step": 21260 }, { "epoch": 2.1304151850553414, "grad_norm": 2.534501791000366, "learning_rate": 3.0843031972481706e-05, "loss": 0.5521, "step": 21270 }, { "epoch": 2.131416837782341, "grad_norm": 2.3288180828094482, "learning_rate": 3.082770272082706e-05, "loss": 0.4715, "step": 21280 }, { "epoch": 2.13241849050934, "grad_norm": 2.3620693683624268, "learning_rate": 3.081237115183285e-05, "loss": 0.4759, "step": 21290 }, { "epoch": 2.1334201432363398, "grad_norm": 1.644264578819275, "learning_rate": 3.079703727159556e-05, "loss": 0.4878, "step": 21300 }, { "epoch": 2.1344217959633394, "grad_norm": 1.9679802656173706, "learning_rate": 3.07817010862126e-05, "loss": 0.4896, "step": 21310 }, { "epoch": 2.135423448690339, "grad_norm": 1.9164067506790161, "learning_rate": 3.0766362601782276e-05, "loss": 0.4766, "step": 21320 }, { "epoch": 2.1364251014173385, "grad_norm": 3.025406837463379, "learning_rate": 3.075102182440379e-05, "loss": 0.5287, "step": 21330 }, { "epoch": 2.137426754144338, "grad_norm": 2.551084280014038, "learning_rate": 3.073567876017732e-05, "loss": 0.5036, "step": 21340 }, { "epoch": 2.1384284068713377, "grad_norm": 1.9237477779388428, "learning_rate": 3.072033341520388e-05, "loss": 0.5097, "step": 21350 }, { "epoch": 2.1394300595983373, "grad_norm": 1.697118878364563, "learning_rate": 3.070498579558543e-05, "loss": 0.4359, "step": 21360 }, { "epoch": 2.140431712325337, "grad_norm": 1.9887810945510864, "learning_rate": 3.0689635907424845e-05, "loss": 0.4565, "step": 21370 }, { "epoch": 2.1414333650523365, "grad_norm": 2.0479846000671387, "learning_rate": 3.067428375682587e-05, "loss": 0.4848, "step": 21380 }, { "epoch": 2.142435017779336, "grad_norm": 1.8660268783569336, "learning_rate": 3.0658929349893174e-05, "loss": 0.4491, "step": 21390 }, { "epoch": 2.1434366705063352, "grad_norm": 2.399012327194214, "learning_rate": 3.0643572692732305e-05, "loss": 0.5697, "step": 21400 }, { "epoch": 2.144438323233335, "grad_norm": 2.653744697570801, "learning_rate": 3.062821379144973e-05, "loss": 0.5021, "step": 21410 }, { "epoch": 2.1454399759603344, "grad_norm": 2.3516993522644043, "learning_rate": 3.061285265215279e-05, "loss": 0.4619, "step": 21420 }, { "epoch": 2.146441628687334, "grad_norm": 2.4360647201538086, "learning_rate": 3.059748928094971e-05, "loss": 0.4099, "step": 21430 }, { "epoch": 2.1474432814143336, "grad_norm": 1.9208985567092896, "learning_rate": 3.058212368394962e-05, "loss": 0.4671, "step": 21440 }, { "epoch": 2.148444934141333, "grad_norm": 1.6549501419067383, "learning_rate": 3.056675586726252e-05, "loss": 0.5079, "step": 21450 }, { "epoch": 2.149446586868333, "grad_norm": 2.041652202606201, "learning_rate": 3.05513858369993e-05, "loss": 0.4768, "step": 21460 }, { "epoch": 2.1504482395953324, "grad_norm": 2.64847731590271, "learning_rate": 3.053601359927174e-05, "loss": 0.499, "step": 21470 }, { "epoch": 2.151449892322332, "grad_norm": 2.6299538612365723, "learning_rate": 3.0520639160192484e-05, "loss": 0.4828, "step": 21480 }, { "epoch": 2.1524515450493316, "grad_norm": 2.515505790710449, "learning_rate": 3.0505262525875045e-05, "loss": 0.4939, "step": 21490 }, { "epoch": 2.153453197776331, "grad_norm": 2.789552927017212, "learning_rate": 3.048988370243384e-05, "loss": 0.4844, "step": 21500 }, { "epoch": 2.1544548505033303, "grad_norm": 2.123347282409668, "learning_rate": 3.0474502695984113e-05, "loss": 0.5176, "step": 21510 }, { "epoch": 2.15545650323033, "grad_norm": 2.637828826904297, "learning_rate": 3.045911951264202e-05, "loss": 0.4876, "step": 21520 }, { "epoch": 2.1564581559573295, "grad_norm": 1.7636555433273315, "learning_rate": 3.0443734158524538e-05, "loss": 0.4821, "step": 21530 }, { "epoch": 2.157459808684329, "grad_norm": 2.0353643894195557, "learning_rate": 3.042834663974955e-05, "loss": 0.4496, "step": 21540 }, { "epoch": 2.1584614614113287, "grad_norm": 1.786335825920105, "learning_rate": 3.0412956962435773e-05, "loss": 0.4764, "step": 21550 }, { "epoch": 2.1594631141383283, "grad_norm": 1.59462571144104, "learning_rate": 3.039756513270278e-05, "loss": 0.4608, "step": 21560 }, { "epoch": 2.160464766865328, "grad_norm": 2.1465108394622803, "learning_rate": 3.0382171156671028e-05, "loss": 0.5007, "step": 21570 }, { "epoch": 2.1614664195923274, "grad_norm": 1.9091776609420776, "learning_rate": 3.0366775040461802e-05, "loss": 0.5191, "step": 21580 }, { "epoch": 2.162468072319327, "grad_norm": 2.044081449508667, "learning_rate": 3.035137679019724e-05, "loss": 0.4763, "step": 21590 }, { "epoch": 2.1634697250463266, "grad_norm": 1.9604840278625488, "learning_rate": 3.033597641200034e-05, "loss": 0.4886, "step": 21600 }, { "epoch": 2.164471377773326, "grad_norm": 2.2669661045074463, "learning_rate": 3.032057391199493e-05, "loss": 0.4859, "step": 21610 }, { "epoch": 2.1654730305003254, "grad_norm": 2.0096001625061035, "learning_rate": 3.0305169296305695e-05, "loss": 0.5297, "step": 21620 }, { "epoch": 2.166474683227325, "grad_norm": 2.0376088619232178, "learning_rate": 3.0289762571058167e-05, "loss": 0.47, "step": 21630 }, { "epoch": 2.1674763359543245, "grad_norm": 2.465101718902588, "learning_rate": 3.0274353742378697e-05, "loss": 0.5087, "step": 21640 }, { "epoch": 2.168477988681324, "grad_norm": 1.4533342123031616, "learning_rate": 3.0258942816394492e-05, "loss": 0.52, "step": 21650 }, { "epoch": 2.1694796414083237, "grad_norm": 1.972916841506958, "learning_rate": 3.0243529799233572e-05, "loss": 0.4282, "step": 21660 }, { "epoch": 2.1704812941353233, "grad_norm": 1.6645925045013428, "learning_rate": 3.0228114697024813e-05, "loss": 0.4742, "step": 21670 }, { "epoch": 2.171482946862323, "grad_norm": 2.133258581161499, "learning_rate": 3.0212697515897893e-05, "loss": 0.5186, "step": 21680 }, { "epoch": 2.1724845995893225, "grad_norm": 1.7237110137939453, "learning_rate": 3.0197278261983343e-05, "loss": 0.4855, "step": 21690 }, { "epoch": 2.173486252316322, "grad_norm": 1.968004822731018, "learning_rate": 3.0181856941412512e-05, "loss": 0.4972, "step": 21700 }, { "epoch": 2.1744879050433217, "grad_norm": 1.9006356000900269, "learning_rate": 3.0166433560317543e-05, "loss": 0.4153, "step": 21710 }, { "epoch": 2.175489557770321, "grad_norm": 2.670501708984375, "learning_rate": 3.0151008124831438e-05, "loss": 0.5024, "step": 21720 }, { "epoch": 2.1764912104973204, "grad_norm": 1.9438066482543945, "learning_rate": 3.0135580641088e-05, "loss": 0.4693, "step": 21730 }, { "epoch": 2.17749286322432, "grad_norm": 2.635740280151367, "learning_rate": 3.0120151115221834e-05, "loss": 0.5429, "step": 21740 }, { "epoch": 2.1784945159513196, "grad_norm": 1.8196395635604858, "learning_rate": 3.010471955336838e-05, "loss": 0.4438, "step": 21750 }, { "epoch": 2.179496168678319, "grad_norm": 2.196422576904297, "learning_rate": 3.008928596166386e-05, "loss": 0.5454, "step": 21760 }, { "epoch": 2.180497821405319, "grad_norm": 2.2442145347595215, "learning_rate": 3.0073850346245337e-05, "loss": 0.4403, "step": 21770 }, { "epoch": 2.1814994741323184, "grad_norm": 1.8484165668487549, "learning_rate": 3.005841271325065e-05, "loss": 0.4638, "step": 21780 }, { "epoch": 2.182501126859318, "grad_norm": 1.8003365993499756, "learning_rate": 3.004297306881845e-05, "loss": 0.4501, "step": 21790 }, { "epoch": 2.1835027795863176, "grad_norm": 2.756234884262085, "learning_rate": 3.002753141908819e-05, "loss": 0.4455, "step": 21800 }, { "epoch": 2.184504432313317, "grad_norm": 1.779693841934204, "learning_rate": 3.001208777020012e-05, "loss": 0.4788, "step": 21810 }, { "epoch": 2.1855060850403163, "grad_norm": 1.9188098907470703, "learning_rate": 2.999664212829528e-05, "loss": 0.4916, "step": 21820 }, { "epoch": 2.186507737767316, "grad_norm": 2.3162620067596436, "learning_rate": 2.998119449951552e-05, "loss": 0.4194, "step": 21830 }, { "epoch": 2.1875093904943155, "grad_norm": 2.515458822250366, "learning_rate": 2.996574489000345e-05, "loss": 0.4606, "step": 21840 }, { "epoch": 2.188511043221315, "grad_norm": 1.8557404279708862, "learning_rate": 2.9950293305902494e-05, "loss": 0.5494, "step": 21850 }, { "epoch": 2.1895126959483147, "grad_norm": 1.8735612630844116, "learning_rate": 2.9934839753356846e-05, "loss": 0.4404, "step": 21860 }, { "epoch": 2.1905143486753142, "grad_norm": 1.6917238235473633, "learning_rate": 2.9919384238511484e-05, "loss": 0.5173, "step": 21870 }, { "epoch": 2.191516001402314, "grad_norm": 1.8293862342834473, "learning_rate": 2.990392676751219e-05, "loss": 0.4723, "step": 21880 }, { "epoch": 2.1925176541293134, "grad_norm": 1.5702310800552368, "learning_rate": 2.9888467346505478e-05, "loss": 0.4483, "step": 21890 }, { "epoch": 2.193519306856313, "grad_norm": 1.7046412229537964, "learning_rate": 2.987300598163868e-05, "loss": 0.4452, "step": 21900 }, { "epoch": 2.1945209595833126, "grad_norm": 2.7863757610321045, "learning_rate": 2.985754267905989e-05, "loss": 0.574, "step": 21910 }, { "epoch": 2.195522612310312, "grad_norm": 2.4093217849731445, "learning_rate": 2.9842077444917944e-05, "loss": 0.4968, "step": 21920 }, { "epoch": 2.1965242650373114, "grad_norm": 2.1676666736602783, "learning_rate": 2.9826610285362494e-05, "loss": 0.4581, "step": 21930 }, { "epoch": 2.197525917764311, "grad_norm": 2.1943066120147705, "learning_rate": 2.9811141206543914e-05, "loss": 0.4902, "step": 21940 }, { "epoch": 2.1985275704913105, "grad_norm": 2.6076924800872803, "learning_rate": 2.979567021461337e-05, "loss": 0.5178, "step": 21950 }, { "epoch": 2.19952922321831, "grad_norm": 1.8468619585037231, "learning_rate": 2.978019731572278e-05, "loss": 0.4675, "step": 21960 }, { "epoch": 2.2005308759453097, "grad_norm": 2.3149871826171875, "learning_rate": 2.976472251602481e-05, "loss": 0.4665, "step": 21970 }, { "epoch": 2.2015325286723093, "grad_norm": 2.723015308380127, "learning_rate": 2.97492458216729e-05, "loss": 0.4283, "step": 21980 }, { "epoch": 2.202534181399309, "grad_norm": 2.4012863636016846, "learning_rate": 2.9733767238821224e-05, "loss": 0.5706, "step": 21990 }, { "epoch": 2.2035358341263085, "grad_norm": 1.7116522789001465, "learning_rate": 2.9718286773624733e-05, "loss": 0.5375, "step": 22000 }, { "epoch": 2.204537486853308, "grad_norm": 2.074054002761841, "learning_rate": 2.9702804432239094e-05, "loss": 0.4885, "step": 22010 }, { "epoch": 2.2055391395803077, "grad_norm": 2.2811458110809326, "learning_rate": 2.9687320220820748e-05, "loss": 0.5012, "step": 22020 }, { "epoch": 2.2065407923073073, "grad_norm": 1.5797665119171143, "learning_rate": 2.967183414552686e-05, "loss": 0.4347, "step": 22030 }, { "epoch": 2.2075424450343064, "grad_norm": 1.8701937198638916, "learning_rate": 2.965634621251535e-05, "loss": 0.5473, "step": 22040 }, { "epoch": 2.208544097761306, "grad_norm": 2.81683349609375, "learning_rate": 2.9640856427944863e-05, "loss": 0.511, "step": 22050 }, { "epoch": 2.2095457504883056, "grad_norm": 4.241177082061768, "learning_rate": 2.962536479797481e-05, "loss": 0.4851, "step": 22060 }, { "epoch": 2.210547403215305, "grad_norm": 2.56923246383667, "learning_rate": 2.9609871328765288e-05, "loss": 0.5112, "step": 22070 }, { "epoch": 2.2115490559423048, "grad_norm": 1.703288197517395, "learning_rate": 2.9594376026477173e-05, "loss": 0.4989, "step": 22080 }, { "epoch": 2.2125507086693044, "grad_norm": 2.1868696212768555, "learning_rate": 2.9578878897272022e-05, "loss": 0.4711, "step": 22090 }, { "epoch": 2.213552361396304, "grad_norm": 2.30442214012146, "learning_rate": 2.9563379947312176e-05, "loss": 0.5014, "step": 22100 }, { "epoch": 2.2145540141233035, "grad_norm": 1.7584575414657593, "learning_rate": 2.954787918276065e-05, "loss": 0.4397, "step": 22110 }, { "epoch": 2.215555666850303, "grad_norm": 2.35834002494812, "learning_rate": 2.9532376609781197e-05, "loss": 0.4921, "step": 22120 }, { "epoch": 2.2165573195773027, "grad_norm": 2.399610757827759, "learning_rate": 2.9516872234538306e-05, "loss": 0.4985, "step": 22130 }, { "epoch": 2.2175589723043023, "grad_norm": 2.3085238933563232, "learning_rate": 2.950136606319716e-05, "loss": 0.5122, "step": 22140 }, { "epoch": 2.2185606250313015, "grad_norm": 1.8348264694213867, "learning_rate": 2.9485858101923664e-05, "loss": 0.4883, "step": 22150 }, { "epoch": 2.219562277758301, "grad_norm": 1.838392734527588, "learning_rate": 2.9470348356884436e-05, "loss": 0.4828, "step": 22160 }, { "epoch": 2.2205639304853007, "grad_norm": 2.5261988639831543, "learning_rate": 2.945483683424681e-05, "loss": 0.4862, "step": 22170 }, { "epoch": 2.2215655832123002, "grad_norm": 2.360555410385132, "learning_rate": 2.9439323540178803e-05, "loss": 0.4755, "step": 22180 }, { "epoch": 2.2225672359393, "grad_norm": 1.8836420774459839, "learning_rate": 2.9423808480849162e-05, "loss": 0.5563, "step": 22190 }, { "epoch": 2.2235688886662994, "grad_norm": 2.629767417907715, "learning_rate": 2.9408291662427334e-05, "loss": 0.4914, "step": 22200 }, { "epoch": 2.224570541393299, "grad_norm": 1.7553353309631348, "learning_rate": 2.939277309108345e-05, "loss": 0.4442, "step": 22210 }, { "epoch": 2.2255721941202986, "grad_norm": 1.9337244033813477, "learning_rate": 2.937725277298834e-05, "loss": 0.4718, "step": 22220 }, { "epoch": 2.226573846847298, "grad_norm": 2.2966341972351074, "learning_rate": 2.936173071431354e-05, "loss": 0.4718, "step": 22230 }, { "epoch": 2.227575499574298, "grad_norm": 1.795206069946289, "learning_rate": 2.934620692123129e-05, "loss": 0.4941, "step": 22240 }, { "epoch": 2.228577152301297, "grad_norm": 2.291245698928833, "learning_rate": 2.933068139991447e-05, "loss": 0.47, "step": 22250 }, { "epoch": 2.2295788050282965, "grad_norm": 2.183180570602417, "learning_rate": 2.931515415653671e-05, "loss": 0.4611, "step": 22260 }, { "epoch": 2.230580457755296, "grad_norm": 2.6957666873931885, "learning_rate": 2.9299625197272274e-05, "loss": 0.5005, "step": 22270 }, { "epoch": 2.2315821104822957, "grad_norm": 2.4472970962524414, "learning_rate": 2.9284094528296147e-05, "loss": 0.5073, "step": 22280 }, { "epoch": 2.2325837632092953, "grad_norm": 2.379964590072632, "learning_rate": 2.9268562155783957e-05, "loss": 0.4925, "step": 22290 }, { "epoch": 2.233585415936295, "grad_norm": 2.3397207260131836, "learning_rate": 2.9253028085912044e-05, "loss": 0.4374, "step": 22300 }, { "epoch": 2.2345870686632945, "grad_norm": 1.8619310855865479, "learning_rate": 2.9237492324857397e-05, "loss": 0.4889, "step": 22310 }, { "epoch": 2.235588721390294, "grad_norm": 1.8362969160079956, "learning_rate": 2.9221954878797686e-05, "loss": 0.43, "step": 22320 }, { "epoch": 2.2365903741172937, "grad_norm": 3.204434633255005, "learning_rate": 2.9206415753911266e-05, "loss": 0.5093, "step": 22330 }, { "epoch": 2.2375920268442933, "grad_norm": 2.4659273624420166, "learning_rate": 2.9190874956377136e-05, "loss": 0.4225, "step": 22340 }, { "epoch": 2.2385936795712924, "grad_norm": 1.955161213874817, "learning_rate": 2.9175332492374963e-05, "loss": 0.5238, "step": 22350 }, { "epoch": 2.239595332298292, "grad_norm": 1.948196291923523, "learning_rate": 2.91597883680851e-05, "loss": 0.4562, "step": 22360 }, { "epoch": 2.2405969850252916, "grad_norm": 2.1601054668426514, "learning_rate": 2.9144242589688535e-05, "loss": 0.4433, "step": 22370 }, { "epoch": 2.241598637752291, "grad_norm": 2.4899299144744873, "learning_rate": 2.9128695163366927e-05, "loss": 0.4281, "step": 22380 }, { "epoch": 2.2426002904792908, "grad_norm": 2.4548087120056152, "learning_rate": 2.9113146095302584e-05, "loss": 0.5069, "step": 22390 }, { "epoch": 2.2436019432062904, "grad_norm": 2.362183094024658, "learning_rate": 2.9097595391678467e-05, "loss": 0.4856, "step": 22400 }, { "epoch": 2.24460359593329, "grad_norm": 2.1862354278564453, "learning_rate": 2.9082043058678192e-05, "loss": 0.4262, "step": 22410 }, { "epoch": 2.2456052486602895, "grad_norm": 2.056276559829712, "learning_rate": 2.9066489102486023e-05, "loss": 0.4686, "step": 22420 }, { "epoch": 2.246606901387289, "grad_norm": 2.3078675270080566, "learning_rate": 2.9050933529286857e-05, "loss": 0.5046, "step": 22430 }, { "epoch": 2.2476085541142887, "grad_norm": 2.5429131984710693, "learning_rate": 2.9035376345266256e-05, "loss": 0.5805, "step": 22440 }, { "epoch": 2.2486102068412883, "grad_norm": 2.4503445625305176, "learning_rate": 2.90198175566104e-05, "loss": 0.4483, "step": 22450 }, { "epoch": 2.2496118595682875, "grad_norm": 2.3516530990600586, "learning_rate": 2.9004257169506128e-05, "loss": 0.4998, "step": 22460 }, { "epoch": 2.250613512295287, "grad_norm": 2.0752251148223877, "learning_rate": 2.898869519014089e-05, "loss": 0.4373, "step": 22470 }, { "epoch": 2.2516151650222866, "grad_norm": 1.8537724018096924, "learning_rate": 2.8973131624702798e-05, "loss": 0.4856, "step": 22480 }, { "epoch": 2.2526168177492862, "grad_norm": 1.905326247215271, "learning_rate": 2.8957566479380576e-05, "loss": 0.49, "step": 22490 }, { "epoch": 2.253618470476286, "grad_norm": 1.725106954574585, "learning_rate": 2.8941999760363563e-05, "loss": 0.4265, "step": 22500 }, { "epoch": 2.2546201232032854, "grad_norm": 2.606820821762085, "learning_rate": 2.8926431473841763e-05, "loss": 0.4936, "step": 22510 }, { "epoch": 2.255621775930285, "grad_norm": 2.2708587646484375, "learning_rate": 2.8910861626005776e-05, "loss": 0.4706, "step": 22520 }, { "epoch": 2.2566234286572846, "grad_norm": 1.8929194211959839, "learning_rate": 2.889529022304682e-05, "loss": 0.4891, "step": 22530 }, { "epoch": 2.257625081384284, "grad_norm": 2.43183970451355, "learning_rate": 2.887971727115674e-05, "loss": 0.5013, "step": 22540 }, { "epoch": 2.258626734111284, "grad_norm": 1.686383605003357, "learning_rate": 2.8864142776528007e-05, "loss": 0.4561, "step": 22550 }, { "epoch": 2.2596283868382834, "grad_norm": 1.827078104019165, "learning_rate": 2.8848566745353683e-05, "loss": 0.423, "step": 22560 }, { "epoch": 2.2606300395652825, "grad_norm": 2.1075212955474854, "learning_rate": 2.8832989183827464e-05, "loss": 0.4196, "step": 22570 }, { "epoch": 2.261631692292282, "grad_norm": 2.4093353748321533, "learning_rate": 2.881741009814363e-05, "loss": 0.5551, "step": 22580 }, { "epoch": 2.2626333450192817, "grad_norm": 2.049708843231201, "learning_rate": 2.8801829494497095e-05, "loss": 0.4111, "step": 22590 }, { "epoch": 2.2636349977462813, "grad_norm": 2.1682515144348145, "learning_rate": 2.878624737908335e-05, "loss": 0.516, "step": 22600 }, { "epoch": 2.264636650473281, "grad_norm": 2.5212790966033936, "learning_rate": 2.8770663758098494e-05, "loss": 0.4576, "step": 22610 }, { "epoch": 2.2656383032002805, "grad_norm": 2.4226863384246826, "learning_rate": 2.8755078637739253e-05, "loss": 0.5454, "step": 22620 }, { "epoch": 2.26663995592728, "grad_norm": 1.4106111526489258, "learning_rate": 2.8739492024202895e-05, "loss": 0.3773, "step": 22630 }, { "epoch": 2.2676416086542797, "grad_norm": 2.756021022796631, "learning_rate": 2.8723903923687345e-05, "loss": 0.4958, "step": 22640 }, { "epoch": 2.2686432613812793, "grad_norm": 2.1808178424835205, "learning_rate": 2.8708314342391062e-05, "loss": 0.4745, "step": 22650 }, { "epoch": 2.269644914108279, "grad_norm": 2.1560256481170654, "learning_rate": 2.8692723286513133e-05, "loss": 0.465, "step": 22660 }, { "epoch": 2.2706465668352784, "grad_norm": 2.243764638900757, "learning_rate": 2.8677130762253212e-05, "loss": 0.4736, "step": 22670 }, { "epoch": 2.2716482195622776, "grad_norm": 2.5886261463165283, "learning_rate": 2.8661536775811542e-05, "loss": 0.4174, "step": 22680 }, { "epoch": 2.272649872289277, "grad_norm": 2.1652519702911377, "learning_rate": 2.8645941333388948e-05, "loss": 0.5013, "step": 22690 }, { "epoch": 2.2736515250162768, "grad_norm": 2.6116833686828613, "learning_rate": 2.863034444118683e-05, "loss": 0.4973, "step": 22700 }, { "epoch": 2.2746531777432764, "grad_norm": 2.0481340885162354, "learning_rate": 2.8614746105407177e-05, "loss": 0.4399, "step": 22710 }, { "epoch": 2.275654830470276, "grad_norm": 2.0342276096343994, "learning_rate": 2.859914633225253e-05, "loss": 0.5249, "step": 22720 }, { "epoch": 2.2766564831972755, "grad_norm": 2.232968330383301, "learning_rate": 2.8583545127926025e-05, "loss": 0.4847, "step": 22730 }, { "epoch": 2.277658135924275, "grad_norm": 3.0498735904693604, "learning_rate": 2.856794249863135e-05, "loss": 0.5188, "step": 22740 }, { "epoch": 2.2786597886512747, "grad_norm": 1.7310694456100464, "learning_rate": 2.8552338450572768e-05, "loss": 0.4896, "step": 22750 }, { "epoch": 2.2796614413782743, "grad_norm": 2.2194247245788574, "learning_rate": 2.8536732989955105e-05, "loss": 0.4247, "step": 22760 }, { "epoch": 2.2806630941052735, "grad_norm": 1.4236329793930054, "learning_rate": 2.852112612298376e-05, "loss": 0.4623, "step": 22770 }, { "epoch": 2.2816647468322735, "grad_norm": 1.7755547761917114, "learning_rate": 2.850551785586466e-05, "loss": 0.4402, "step": 22780 }, { "epoch": 2.2826663995592726, "grad_norm": 2.213015079498291, "learning_rate": 2.848990819480431e-05, "loss": 0.4715, "step": 22790 }, { "epoch": 2.2836680522862722, "grad_norm": 1.86104154586792, "learning_rate": 2.847429714600979e-05, "loss": 0.4849, "step": 22800 }, { "epoch": 2.284669705013272, "grad_norm": 2.5028235912323, "learning_rate": 2.8458684715688676e-05, "loss": 0.5567, "step": 22810 }, { "epoch": 2.2856713577402714, "grad_norm": 2.2891948223114014, "learning_rate": 2.8443070910049153e-05, "loss": 0.5308, "step": 22820 }, { "epoch": 2.286673010467271, "grad_norm": 2.1193156242370605, "learning_rate": 2.8427455735299908e-05, "loss": 0.5333, "step": 22830 }, { "epoch": 2.2876746631942706, "grad_norm": 1.979323148727417, "learning_rate": 2.841183919765021e-05, "loss": 0.4608, "step": 22840 }, { "epoch": 2.28867631592127, "grad_norm": 2.817578077316284, "learning_rate": 2.8396221303309832e-05, "loss": 0.3857, "step": 22850 }, { "epoch": 2.28967796864827, "grad_norm": 2.252032518386841, "learning_rate": 2.8380602058489115e-05, "loss": 0.5232, "step": 22860 }, { "epoch": 2.2906796213752694, "grad_norm": 4.845706939697266, "learning_rate": 2.8364981469398925e-05, "loss": 0.4782, "step": 22870 }, { "epoch": 2.2916812741022685, "grad_norm": 2.3078505992889404, "learning_rate": 2.834935954225067e-05, "loss": 0.4634, "step": 22880 }, { "epoch": 2.292682926829268, "grad_norm": 2.475879192352295, "learning_rate": 2.8333736283256272e-05, "loss": 0.4872, "step": 22890 }, { "epoch": 2.2936845795562677, "grad_norm": 2.6143455505371094, "learning_rate": 2.8318111698628214e-05, "loss": 0.4772, "step": 22900 }, { "epoch": 2.2946862322832673, "grad_norm": 2.3077473640441895, "learning_rate": 2.830248579457947e-05, "loss": 0.4862, "step": 22910 }, { "epoch": 2.295687885010267, "grad_norm": 1.7800365686416626, "learning_rate": 2.8286858577323566e-05, "loss": 0.4625, "step": 22920 }, { "epoch": 2.2966895377372665, "grad_norm": 2.5691211223602295, "learning_rate": 2.827123005307454e-05, "loss": 0.4215, "step": 22930 }, { "epoch": 2.297691190464266, "grad_norm": 2.73599910736084, "learning_rate": 2.8255600228046935e-05, "loss": 0.4423, "step": 22940 }, { "epoch": 2.2986928431912657, "grad_norm": 1.8676958084106445, "learning_rate": 2.8239969108455856e-05, "loss": 0.4174, "step": 22950 }, { "epoch": 2.2996944959182652, "grad_norm": 1.835666537284851, "learning_rate": 2.8224336700516873e-05, "loss": 0.4988, "step": 22960 }, { "epoch": 2.300696148645265, "grad_norm": 2.497913122177124, "learning_rate": 2.8208703010446086e-05, "loss": 0.5001, "step": 22970 }, { "epoch": 2.3016978013722644, "grad_norm": 2.3494608402252197, "learning_rate": 2.8193068044460118e-05, "loss": 0.4715, "step": 22980 }, { "epoch": 2.3026994540992636, "grad_norm": 1.8175679445266724, "learning_rate": 2.8177431808776078e-05, "loss": 0.4408, "step": 22990 }, { "epoch": 2.303701106826263, "grad_norm": 2.166266918182373, "learning_rate": 2.8161794309611612e-05, "loss": 0.3983, "step": 23000 }, { "epoch": 2.3047027595532628, "grad_norm": 1.8629862070083618, "learning_rate": 2.8146155553184812e-05, "loss": 0.4278, "step": 23010 }, { "epoch": 2.3057044122802623, "grad_norm": 2.0270631313323975, "learning_rate": 2.813051554571433e-05, "loss": 0.5, "step": 23020 }, { "epoch": 2.306706065007262, "grad_norm": 2.0581462383270264, "learning_rate": 2.811487429341929e-05, "loss": 0.4549, "step": 23030 }, { "epoch": 2.3077077177342615, "grad_norm": 2.967930793762207, "learning_rate": 2.8099231802519304e-05, "loss": 0.5308, "step": 23040 }, { "epoch": 2.308709370461261, "grad_norm": 1.926661491394043, "learning_rate": 2.8083588079234485e-05, "loss": 0.4796, "step": 23050 }, { "epoch": 2.3097110231882607, "grad_norm": 2.6114249229431152, "learning_rate": 2.806794312978544e-05, "loss": 0.5267, "step": 23060 }, { "epoch": 2.3107126759152603, "grad_norm": 1.7948778867721558, "learning_rate": 2.805229696039325e-05, "loss": 0.461, "step": 23070 }, { "epoch": 2.31171432864226, "grad_norm": 1.9823604822158813, "learning_rate": 2.8036649577279495e-05, "loss": 0.4477, "step": 23080 }, { "epoch": 2.3127159813692595, "grad_norm": 2.148059844970703, "learning_rate": 2.8021000986666235e-05, "loss": 0.4411, "step": 23090 }, { "epoch": 2.3137176340962586, "grad_norm": 2.309586763381958, "learning_rate": 2.8005351194775996e-05, "loss": 0.5383, "step": 23100 }, { "epoch": 2.3147192868232582, "grad_norm": 2.083111047744751, "learning_rate": 2.7989700207831803e-05, "loss": 0.4957, "step": 23110 }, { "epoch": 2.315720939550258, "grad_norm": 2.7374930381774902, "learning_rate": 2.7974048032057142e-05, "loss": 0.466, "step": 23120 }, { "epoch": 2.3167225922772574, "grad_norm": 2.315837860107422, "learning_rate": 2.7958394673675986e-05, "loss": 0.5647, "step": 23130 }, { "epoch": 2.317724245004257, "grad_norm": 2.1665873527526855, "learning_rate": 2.7942740138912748e-05, "loss": 0.47, "step": 23140 }, { "epoch": 2.3187258977312566, "grad_norm": 2.3246545791625977, "learning_rate": 2.7927084433992355e-05, "loss": 0.4727, "step": 23150 }, { "epoch": 2.319727550458256, "grad_norm": 2.2014048099517822, "learning_rate": 2.791142756514015e-05, "loss": 0.4729, "step": 23160 }, { "epoch": 2.3207292031852558, "grad_norm": 2.3125619888305664, "learning_rate": 2.7895769538581973e-05, "loss": 0.4782, "step": 23170 }, { "epoch": 2.3217308559122554, "grad_norm": 1.8085143566131592, "learning_rate": 2.788011036054412e-05, "loss": 0.4332, "step": 23180 }, { "epoch": 2.322732508639255, "grad_norm": 2.0354082584381104, "learning_rate": 2.7864450037253316e-05, "loss": 0.483, "step": 23190 }, { "epoch": 2.3237341613662545, "grad_norm": 1.992011308670044, "learning_rate": 2.7848788574936787e-05, "loss": 0.4718, "step": 23200 }, { "epoch": 2.3247358140932537, "grad_norm": 2.2824182510375977, "learning_rate": 2.783312597982218e-05, "loss": 0.5439, "step": 23210 }, { "epoch": 2.3257374668202533, "grad_norm": 2.152175188064575, "learning_rate": 2.78174622581376e-05, "loss": 0.4938, "step": 23220 }, { "epoch": 2.326739119547253, "grad_norm": 1.9049335718154907, "learning_rate": 2.7801797416111597e-05, "loss": 0.5244, "step": 23230 }, { "epoch": 2.3277407722742525, "grad_norm": 1.704810619354248, "learning_rate": 2.7786131459973185e-05, "loss": 0.4052, "step": 23240 }, { "epoch": 2.328742425001252, "grad_norm": 1.6601033210754395, "learning_rate": 2.7770464395951788e-05, "loss": 0.4639, "step": 23250 }, { "epoch": 2.3297440777282516, "grad_norm": 2.091416597366333, "learning_rate": 2.7754796230277307e-05, "loss": 0.4594, "step": 23260 }, { "epoch": 2.3307457304552512, "grad_norm": 2.0445423126220703, "learning_rate": 2.7739126969180053e-05, "loss": 0.4626, "step": 23270 }, { "epoch": 2.331747383182251, "grad_norm": 2.797914981842041, "learning_rate": 2.7723456618890782e-05, "loss": 0.5565, "step": 23280 }, { "epoch": 2.3327490359092504, "grad_norm": 2.423527956008911, "learning_rate": 2.770778518564069e-05, "loss": 0.5019, "step": 23290 }, { "epoch": 2.3337506886362496, "grad_norm": 2.7250008583068848, "learning_rate": 2.7692112675661385e-05, "loss": 0.4707, "step": 23300 }, { "epoch": 2.3347523413632496, "grad_norm": 2.50681734085083, "learning_rate": 2.7676439095184936e-05, "loss": 0.4785, "step": 23310 }, { "epoch": 2.3357539940902488, "grad_norm": 2.4695465564727783, "learning_rate": 2.76607644504438e-05, "loss": 0.4693, "step": 23320 }, { "epoch": 2.3367556468172483, "grad_norm": 2.140570640563965, "learning_rate": 2.7645088747670885e-05, "loss": 0.4847, "step": 23330 }, { "epoch": 2.337757299544248, "grad_norm": 1.7873948812484741, "learning_rate": 2.7629411993099497e-05, "loss": 0.4984, "step": 23340 }, { "epoch": 2.3387589522712475, "grad_norm": 2.5162179470062256, "learning_rate": 2.7613734192963385e-05, "loss": 0.5601, "step": 23350 }, { "epoch": 2.339760604998247, "grad_norm": 1.9141145944595337, "learning_rate": 2.7598055353496705e-05, "loss": 0.4532, "step": 23360 }, { "epoch": 2.3407622577252467, "grad_norm": 2.3677682876586914, "learning_rate": 2.7582375480934e-05, "loss": 0.4797, "step": 23370 }, { "epoch": 2.3417639104522463, "grad_norm": 2.3838820457458496, "learning_rate": 2.7566694581510272e-05, "loss": 0.4768, "step": 23380 }, { "epoch": 2.342765563179246, "grad_norm": 1.4900152683258057, "learning_rate": 2.7551012661460897e-05, "loss": 0.4558, "step": 23390 }, { "epoch": 2.3437672159062455, "grad_norm": 2.4450631141662598, "learning_rate": 2.7535329727021653e-05, "loss": 0.4752, "step": 23400 }, { "epoch": 2.3447688686332446, "grad_norm": 2.6570770740509033, "learning_rate": 2.751964578442875e-05, "loss": 0.4175, "step": 23410 }, { "epoch": 2.345770521360244, "grad_norm": 2.1034457683563232, "learning_rate": 2.7503960839918775e-05, "loss": 0.5427, "step": 23420 }, { "epoch": 2.346772174087244, "grad_norm": 1.7023869752883911, "learning_rate": 2.7488274899728728e-05, "loss": 0.4666, "step": 23430 }, { "epoch": 2.3477738268142434, "grad_norm": 1.739460825920105, "learning_rate": 2.7472587970095985e-05, "loss": 0.479, "step": 23440 }, { "epoch": 2.348775479541243, "grad_norm": 1.9922562837600708, "learning_rate": 2.7456900057258344e-05, "loss": 0.4813, "step": 23450 }, { "epoch": 2.3497771322682426, "grad_norm": 2.244210720062256, "learning_rate": 2.7441211167453973e-05, "loss": 0.4797, "step": 23460 }, { "epoch": 2.350778784995242, "grad_norm": 2.837597131729126, "learning_rate": 2.7425521306921427e-05, "loss": 0.4473, "step": 23470 }, { "epoch": 2.3517804377222418, "grad_norm": 2.2177488803863525, "learning_rate": 2.740983048189966e-05, "loss": 0.5151, "step": 23480 }, { "epoch": 2.3527820904492414, "grad_norm": 1.891855239868164, "learning_rate": 2.7394138698628e-05, "loss": 0.4739, "step": 23490 }, { "epoch": 2.353783743176241, "grad_norm": 2.4679808616638184, "learning_rate": 2.7378445963346165e-05, "loss": 0.4497, "step": 23500 }, { "epoch": 2.3547853959032405, "grad_norm": 1.8632603883743286, "learning_rate": 2.7362752282294252e-05, "loss": 0.4815, "step": 23510 }, { "epoch": 2.3557870486302397, "grad_norm": 1.9275087118148804, "learning_rate": 2.7347057661712706e-05, "loss": 0.4674, "step": 23520 }, { "epoch": 2.3567887013572393, "grad_norm": 2.283627986907959, "learning_rate": 2.7331362107842388e-05, "loss": 0.4662, "step": 23530 }, { "epoch": 2.357790354084239, "grad_norm": 2.1892151832580566, "learning_rate": 2.7315665626924515e-05, "loss": 0.3666, "step": 23540 }, { "epoch": 2.3587920068112385, "grad_norm": 1.8962342739105225, "learning_rate": 2.7299968225200638e-05, "loss": 0.4528, "step": 23550 }, { "epoch": 2.359793659538238, "grad_norm": 2.1917147636413574, "learning_rate": 2.7284269908912737e-05, "loss": 0.5034, "step": 23560 }, { "epoch": 2.3607953122652376, "grad_norm": 1.915441632270813, "learning_rate": 2.7268570684303095e-05, "loss": 0.5584, "step": 23570 }, { "epoch": 2.3617969649922372, "grad_norm": 2.1486260890960693, "learning_rate": 2.7252870557614402e-05, "loss": 0.4875, "step": 23580 }, { "epoch": 2.362798617719237, "grad_norm": 2.0003814697265625, "learning_rate": 2.7237169535089686e-05, "loss": 0.4692, "step": 23590 }, { "epoch": 2.3638002704462364, "grad_norm": 2.1308910846710205, "learning_rate": 2.7221467622972326e-05, "loss": 0.4914, "step": 23600 }, { "epoch": 2.364801923173236, "grad_norm": 2.5318915843963623, "learning_rate": 2.7205764827506064e-05, "loss": 0.5162, "step": 23610 }, { "epoch": 2.3658035759002356, "grad_norm": 1.9081600904464722, "learning_rate": 2.7190061154934993e-05, "loss": 0.4971, "step": 23620 }, { "epoch": 2.3668052286272347, "grad_norm": 3.0676238536834717, "learning_rate": 2.717435661150356e-05, "loss": 0.474, "step": 23630 }, { "epoch": 2.3678068813542343, "grad_norm": 2.3040099143981934, "learning_rate": 2.7158651203456542e-05, "loss": 0.4463, "step": 23640 }, { "epoch": 2.368808534081234, "grad_norm": 2.0134572982788086, "learning_rate": 2.7142944937039072e-05, "loss": 0.4961, "step": 23650 }, { "epoch": 2.3698101868082335, "grad_norm": 2.1416115760803223, "learning_rate": 2.712723781849662e-05, "loss": 0.4916, "step": 23660 }, { "epoch": 2.370811839535233, "grad_norm": 2.0588133335113525, "learning_rate": 2.7111529854075002e-05, "loss": 0.4562, "step": 23670 }, { "epoch": 2.3718134922622327, "grad_norm": 1.623829960823059, "learning_rate": 2.7095821050020358e-05, "loss": 0.4011, "step": 23680 }, { "epoch": 2.3728151449892323, "grad_norm": 2.447624921798706, "learning_rate": 2.708011141257918e-05, "loss": 0.4624, "step": 23690 }, { "epoch": 2.373816797716232, "grad_norm": 3.0367507934570312, "learning_rate": 2.7064400947998263e-05, "loss": 0.4924, "step": 23700 }, { "epoch": 2.3748184504432315, "grad_norm": 3.0571627616882324, "learning_rate": 2.7048689662524767e-05, "loss": 0.4647, "step": 23710 }, { "epoch": 2.375820103170231, "grad_norm": 2.4752557277679443, "learning_rate": 2.7032977562406147e-05, "loss": 0.5044, "step": 23720 }, { "epoch": 2.3768217558972307, "grad_norm": 2.2401139736175537, "learning_rate": 2.7017264653890202e-05, "loss": 0.5055, "step": 23730 }, { "epoch": 2.37782340862423, "grad_norm": 2.167793035507202, "learning_rate": 2.700155094322504e-05, "loss": 0.4909, "step": 23740 }, { "epoch": 2.3788250613512294, "grad_norm": 2.418596029281616, "learning_rate": 2.6985836436659084e-05, "loss": 0.5, "step": 23750 }, { "epoch": 2.379826714078229, "grad_norm": 2.262390613555908, "learning_rate": 2.6970121140441097e-05, "loss": 0.5482, "step": 23760 }, { "epoch": 2.3808283668052286, "grad_norm": 2.523616075515747, "learning_rate": 2.6954405060820138e-05, "loss": 0.4713, "step": 23770 }, { "epoch": 2.381830019532228, "grad_norm": 1.510984182357788, "learning_rate": 2.6938688204045582e-05, "loss": 0.42, "step": 23780 }, { "epoch": 2.3828316722592278, "grad_norm": 1.5943301916122437, "learning_rate": 2.69229705763671e-05, "loss": 0.4441, "step": 23790 }, { "epoch": 2.3838333249862274, "grad_norm": 2.2615034580230713, "learning_rate": 2.6907252184034697e-05, "loss": 0.4397, "step": 23800 }, { "epoch": 2.384834977713227, "grad_norm": 2.052757740020752, "learning_rate": 2.6891533033298656e-05, "loss": 0.4832, "step": 23810 }, { "epoch": 2.3858366304402265, "grad_norm": 1.8588956594467163, "learning_rate": 2.687581313040958e-05, "loss": 0.4632, "step": 23820 }, { "epoch": 2.3868382831672257, "grad_norm": 2.4952921867370605, "learning_rate": 2.6860092481618355e-05, "loss": 0.4788, "step": 23830 }, { "epoch": 2.3878399358942257, "grad_norm": 2.3003437519073486, "learning_rate": 2.684437109317618e-05, "loss": 0.4824, "step": 23840 }, { "epoch": 2.388841588621225, "grad_norm": 2.297075033187866, "learning_rate": 2.682864897133453e-05, "loss": 0.517, "step": 23850 }, { "epoch": 2.3898432413482245, "grad_norm": 2.4082040786743164, "learning_rate": 2.6812926122345185e-05, "loss": 0.5695, "step": 23860 }, { "epoch": 2.390844894075224, "grad_norm": 2.1202104091644287, "learning_rate": 2.679720255246022e-05, "loss": 0.5065, "step": 23870 }, { "epoch": 2.3918465468022236, "grad_norm": 2.0936365127563477, "learning_rate": 2.6781478267931975e-05, "loss": 0.4594, "step": 23880 }, { "epoch": 2.3928481995292232, "grad_norm": 2.3779218196868896, "learning_rate": 2.67657532750131e-05, "loss": 0.4314, "step": 23890 }, { "epoch": 2.393849852256223, "grad_norm": 1.846227765083313, "learning_rate": 2.6750027579956493e-05, "loss": 0.4417, "step": 23900 }, { "epoch": 2.3948515049832224, "grad_norm": 2.4832935333251953, "learning_rate": 2.6734301189015363e-05, "loss": 0.483, "step": 23910 }, { "epoch": 2.395853157710222, "grad_norm": 2.117427110671997, "learning_rate": 2.6718574108443196e-05, "loss": 0.4926, "step": 23920 }, { "epoch": 2.3968548104372216, "grad_norm": 1.825005292892456, "learning_rate": 2.6702846344493714e-05, "loss": 0.4941, "step": 23930 }, { "epoch": 2.3978564631642207, "grad_norm": 2.119313955307007, "learning_rate": 2.668711790342096e-05, "loss": 0.4495, "step": 23940 }, { "epoch": 2.3988581158912203, "grad_norm": 2.2307143211364746, "learning_rate": 2.667138879147921e-05, "loss": 0.5252, "step": 23950 }, { "epoch": 2.39985976861822, "grad_norm": 2.309399127960205, "learning_rate": 2.665565901492303e-05, "loss": 0.5204, "step": 23960 }, { "epoch": 2.4008614213452195, "grad_norm": 2.689640522003174, "learning_rate": 2.663992858000723e-05, "loss": 0.4835, "step": 23970 }, { "epoch": 2.401863074072219, "grad_norm": 2.8619892597198486, "learning_rate": 2.6624197492986897e-05, "loss": 0.4268, "step": 23980 }, { "epoch": 2.4028647267992187, "grad_norm": 1.9714316129684448, "learning_rate": 2.6608465760117374e-05, "loss": 0.4982, "step": 23990 }, { "epoch": 2.4038663795262183, "grad_norm": 1.962856650352478, "learning_rate": 2.6592733387654255e-05, "loss": 0.4649, "step": 24000 }, { "epoch": 2.404868032253218, "grad_norm": 2.0655150413513184, "learning_rate": 2.65770003818534e-05, "loss": 0.4452, "step": 24010 }, { "epoch": 2.4058696849802175, "grad_norm": 2.481144905090332, "learning_rate": 2.6561266748970902e-05, "loss": 0.4823, "step": 24020 }, { "epoch": 2.406871337707217, "grad_norm": 2.099876642227173, "learning_rate": 2.6545532495263125e-05, "loss": 0.4664, "step": 24030 }, { "epoch": 2.4078729904342167, "grad_norm": 2.909708261489868, "learning_rate": 2.6529797626986662e-05, "loss": 0.5461, "step": 24040 }, { "epoch": 2.408874643161216, "grad_norm": 2.3196239471435547, "learning_rate": 2.651406215039837e-05, "loss": 0.4675, "step": 24050 }, { "epoch": 2.4098762958882154, "grad_norm": 2.3955135345458984, "learning_rate": 2.649832607175532e-05, "loss": 0.493, "step": 24060 }, { "epoch": 2.410877948615215, "grad_norm": 1.9178582429885864, "learning_rate": 2.6482589397314854e-05, "loss": 0.4758, "step": 24070 }, { "epoch": 2.4118796013422146, "grad_norm": 2.7419323921203613, "learning_rate": 2.646685213333452e-05, "loss": 0.4915, "step": 24080 }, { "epoch": 2.412881254069214, "grad_norm": 2.2376959323883057, "learning_rate": 2.645111428607212e-05, "loss": 0.4918, "step": 24090 }, { "epoch": 2.4138829067962138, "grad_norm": 1.6830248832702637, "learning_rate": 2.6435375861785692e-05, "loss": 0.4887, "step": 24100 }, { "epoch": 2.4148845595232133, "grad_norm": 1.9309697151184082, "learning_rate": 2.641963686673349e-05, "loss": 0.498, "step": 24110 }, { "epoch": 2.415886212250213, "grad_norm": 1.619993805885315, "learning_rate": 2.6403897307173996e-05, "loss": 0.3869, "step": 24120 }, { "epoch": 2.4168878649772125, "grad_norm": 2.886514663696289, "learning_rate": 2.638815718936593e-05, "loss": 0.4794, "step": 24130 }, { "epoch": 2.417889517704212, "grad_norm": 2.1846656799316406, "learning_rate": 2.6372416519568216e-05, "loss": 0.4546, "step": 24140 }, { "epoch": 2.4188911704312117, "grad_norm": 2.40488338470459, "learning_rate": 2.6356675304040003e-05, "loss": 0.4387, "step": 24150 }, { "epoch": 2.419892823158211, "grad_norm": 2.2383806705474854, "learning_rate": 2.634093354904067e-05, "loss": 0.49, "step": 24160 }, { "epoch": 2.4208944758852105, "grad_norm": 2.0622711181640625, "learning_rate": 2.6325191260829795e-05, "loss": 0.5343, "step": 24170 }, { "epoch": 2.42189612861221, "grad_norm": 1.8681621551513672, "learning_rate": 2.6309448445667172e-05, "loss": 0.4797, "step": 24180 }, { "epoch": 2.4228977813392096, "grad_norm": 2.4106242656707764, "learning_rate": 2.6293705109812806e-05, "loss": 0.4516, "step": 24190 }, { "epoch": 2.4238994340662092, "grad_norm": 2.238723039627075, "learning_rate": 2.627796125952692e-05, "loss": 0.4675, "step": 24200 }, { "epoch": 2.424901086793209, "grad_norm": 2.0002214908599854, "learning_rate": 2.6262216901069913e-05, "loss": 0.4723, "step": 24210 }, { "epoch": 2.4259027395202084, "grad_norm": 1.454474687576294, "learning_rate": 2.6246472040702413e-05, "loss": 0.4336, "step": 24220 }, { "epoch": 2.426904392247208, "grad_norm": 2.259845495223999, "learning_rate": 2.6230726684685246e-05, "loss": 0.4863, "step": 24230 }, { "epoch": 2.4279060449742076, "grad_norm": 2.448485851287842, "learning_rate": 2.621498083927941e-05, "loss": 0.4837, "step": 24240 }, { "epoch": 2.428907697701207, "grad_norm": 1.6946465969085693, "learning_rate": 2.619923451074613e-05, "loss": 0.4887, "step": 24250 }, { "epoch": 2.4299093504282068, "grad_norm": 2.0357892513275146, "learning_rate": 2.6183487705346794e-05, "loss": 0.5002, "step": 24260 }, { "epoch": 2.430911003155206, "grad_norm": 2.7066149711608887, "learning_rate": 2.6167740429343006e-05, "loss": 0.4957, "step": 24270 }, { "epoch": 2.4319126558822055, "grad_norm": 2.0033223628997803, "learning_rate": 2.6151992688996546e-05, "loss": 0.462, "step": 24280 }, { "epoch": 2.432914308609205, "grad_norm": 2.564603328704834, "learning_rate": 2.6136244490569366e-05, "loss": 0.4292, "step": 24290 }, { "epoch": 2.4339159613362047, "grad_norm": 3.0108890533447266, "learning_rate": 2.6120495840323628e-05, "loss": 0.537, "step": 24300 }, { "epoch": 2.4349176140632043, "grad_norm": 2.422274351119995, "learning_rate": 2.610474674452163e-05, "loss": 0.4835, "step": 24310 }, { "epoch": 2.435919266790204, "grad_norm": 2.4366953372955322, "learning_rate": 2.6088997209425903e-05, "loss": 0.4794, "step": 24320 }, { "epoch": 2.4369209195172035, "grad_norm": 2.602489709854126, "learning_rate": 2.607324724129911e-05, "loss": 0.5207, "step": 24330 }, { "epoch": 2.437922572244203, "grad_norm": 2.0761661529541016, "learning_rate": 2.6057496846404105e-05, "loss": 0.4881, "step": 24340 }, { "epoch": 2.4389242249712026, "grad_norm": 2.906827211380005, "learning_rate": 2.60417460310039e-05, "loss": 0.5105, "step": 24350 }, { "epoch": 2.439925877698202, "grad_norm": 1.975074052810669, "learning_rate": 2.602599480136169e-05, "loss": 0.4768, "step": 24360 }, { "epoch": 2.440927530425202, "grad_norm": 2.0563461780548096, "learning_rate": 2.6010243163740815e-05, "loss": 0.5195, "step": 24370 }, { "epoch": 2.441929183152201, "grad_norm": 1.6481271982192993, "learning_rate": 2.599449112440481e-05, "loss": 0.4575, "step": 24380 }, { "epoch": 2.4429308358792006, "grad_norm": 1.454416275024414, "learning_rate": 2.5978738689617322e-05, "loss": 0.5094, "step": 24390 }, { "epoch": 2.4439324886062, "grad_norm": 2.3923304080963135, "learning_rate": 2.5962985865642198e-05, "loss": 0.5516, "step": 24400 }, { "epoch": 2.4449341413331998, "grad_norm": 2.3013486862182617, "learning_rate": 2.5947232658743414e-05, "loss": 0.4856, "step": 24410 }, { "epoch": 2.4459357940601993, "grad_norm": 2.057035207748413, "learning_rate": 2.593147907518511e-05, "loss": 0.4566, "step": 24420 }, { "epoch": 2.446937446787199, "grad_norm": 1.8558269739151, "learning_rate": 2.5915725121231582e-05, "loss": 0.4738, "step": 24430 }, { "epoch": 2.4479390995141985, "grad_norm": 1.7031394243240356, "learning_rate": 2.5899970803147246e-05, "loss": 0.4891, "step": 24440 }, { "epoch": 2.448940752241198, "grad_norm": 1.8119549751281738, "learning_rate": 2.5884216127196694e-05, "loss": 0.4586, "step": 24450 }, { "epoch": 2.4499424049681977, "grad_norm": 2.4061672687530518, "learning_rate": 2.5868461099644642e-05, "loss": 0.5631, "step": 24460 }, { "epoch": 2.450944057695197, "grad_norm": 1.9223402738571167, "learning_rate": 2.5852705726755954e-05, "loss": 0.5479, "step": 24470 }, { "epoch": 2.4519457104221964, "grad_norm": 2.5272395610809326, "learning_rate": 2.583695001479563e-05, "loss": 0.5184, "step": 24480 }, { "epoch": 2.452947363149196, "grad_norm": 2.009592056274414, "learning_rate": 2.5821193970028806e-05, "loss": 0.4291, "step": 24490 }, { "epoch": 2.4539490158761956, "grad_norm": 2.079152822494507, "learning_rate": 2.580543759872074e-05, "loss": 0.4355, "step": 24500 }, { "epoch": 2.454950668603195, "grad_norm": 1.9347796440124512, "learning_rate": 2.5789680907136826e-05, "loss": 0.5151, "step": 24510 }, { "epoch": 2.455952321330195, "grad_norm": 3.365255117416382, "learning_rate": 2.5773923901542597e-05, "loss": 0.4749, "step": 24520 }, { "epoch": 2.4569539740571944, "grad_norm": 2.0016565322875977, "learning_rate": 2.575816658820369e-05, "loss": 0.4134, "step": 24530 }, { "epoch": 2.457955626784194, "grad_norm": 2.206012725830078, "learning_rate": 2.5742408973385883e-05, "loss": 0.4936, "step": 24540 }, { "epoch": 2.4589572795111936, "grad_norm": 1.8435074090957642, "learning_rate": 2.5726651063355057e-05, "loss": 0.5471, "step": 24550 }, { "epoch": 2.459958932238193, "grad_norm": 3.3343753814697266, "learning_rate": 2.5710892864377234e-05, "loss": 0.5009, "step": 24560 }, { "epoch": 2.4609605849651928, "grad_norm": 2.1347665786743164, "learning_rate": 2.5695134382718522e-05, "loss": 0.4714, "step": 24570 }, { "epoch": 2.461962237692192, "grad_norm": 2.361144542694092, "learning_rate": 2.567937562464517e-05, "loss": 0.531, "step": 24580 }, { "epoch": 2.4629638904191915, "grad_norm": 1.9751992225646973, "learning_rate": 2.5663616596423508e-05, "loss": 0.4586, "step": 24590 }, { "epoch": 2.463965543146191, "grad_norm": 2.1158523559570312, "learning_rate": 2.564785730431999e-05, "loss": 0.598, "step": 24600 }, { "epoch": 2.4649671958731907, "grad_norm": 2.8670387268066406, "learning_rate": 2.5632097754601185e-05, "loss": 0.5161, "step": 24610 }, { "epoch": 2.4659688486001903, "grad_norm": 2.2353668212890625, "learning_rate": 2.5616337953533736e-05, "loss": 0.5001, "step": 24620 }, { "epoch": 2.46697050132719, "grad_norm": 2.321228504180908, "learning_rate": 2.560057790738442e-05, "loss": 0.5423, "step": 24630 }, { "epoch": 2.4679721540541895, "grad_norm": 2.136357307434082, "learning_rate": 2.5584817622420077e-05, "loss": 0.4286, "step": 24640 }, { "epoch": 2.468973806781189, "grad_norm": 1.9053417444229126, "learning_rate": 2.5569057104907668e-05, "loss": 0.4948, "step": 24650 }, { "epoch": 2.4699754595081886, "grad_norm": 1.6119698286056519, "learning_rate": 2.555329636111424e-05, "loss": 0.5038, "step": 24660 }, { "epoch": 2.4709771122351882, "grad_norm": 2.262643814086914, "learning_rate": 2.5537535397306916e-05, "loss": 0.5284, "step": 24670 }, { "epoch": 2.471978764962188, "grad_norm": 2.5736396312713623, "learning_rate": 2.5521774219752926e-05, "loss": 0.4789, "step": 24680 }, { "epoch": 2.472980417689187, "grad_norm": 1.8912774324417114, "learning_rate": 2.5506012834719577e-05, "loss": 0.5158, "step": 24690 }, { "epoch": 2.4739820704161866, "grad_norm": 2.2454535961151123, "learning_rate": 2.5490251248474257e-05, "loss": 0.4481, "step": 24700 }, { "epoch": 2.474983723143186, "grad_norm": 1.9818633794784546, "learning_rate": 2.547448946728443e-05, "loss": 0.4488, "step": 24710 }, { "epoch": 2.4759853758701857, "grad_norm": 1.9652526378631592, "learning_rate": 2.545872749741765e-05, "loss": 0.472, "step": 24720 }, { "epoch": 2.4769870285971853, "grad_norm": 2.1900923252105713, "learning_rate": 2.5442965345141533e-05, "loss": 0.426, "step": 24730 }, { "epoch": 2.477988681324185, "grad_norm": 1.747668743133545, "learning_rate": 2.5427203016723782e-05, "loss": 0.4571, "step": 24740 }, { "epoch": 2.4789903340511845, "grad_norm": 2.722621202468872, "learning_rate": 2.5411440518432145e-05, "loss": 0.533, "step": 24750 }, { "epoch": 2.479991986778184, "grad_norm": 1.841064453125, "learning_rate": 2.5395677856534477e-05, "loss": 0.3963, "step": 24760 }, { "epoch": 2.4809936395051837, "grad_norm": 1.9657102823257446, "learning_rate": 2.537991503729865e-05, "loss": 0.4979, "step": 24770 }, { "epoch": 2.4819952922321833, "grad_norm": 2.6763949394226074, "learning_rate": 2.536415206699264e-05, "loss": 0.5105, "step": 24780 }, { "epoch": 2.482996944959183, "grad_norm": 2.6003458499908447, "learning_rate": 2.5348388951884472e-05, "loss": 0.4752, "step": 24790 }, { "epoch": 2.483998597686182, "grad_norm": 3.0447092056274414, "learning_rate": 2.5332625698242195e-05, "loss": 0.5319, "step": 24800 }, { "epoch": 2.4850002504131816, "grad_norm": 1.5755362510681152, "learning_rate": 2.5316862312333978e-05, "loss": 0.4119, "step": 24810 }, { "epoch": 2.486001903140181, "grad_norm": 1.8942782878875732, "learning_rate": 2.530109880042797e-05, "loss": 0.5605, "step": 24820 }, { "epoch": 2.487003555867181, "grad_norm": 2.6547932624816895, "learning_rate": 2.5285335168792435e-05, "loss": 0.4788, "step": 24830 }, { "epoch": 2.4880052085941804, "grad_norm": 2.3786137104034424, "learning_rate": 2.5269571423695638e-05, "loss": 0.4729, "step": 24840 }, { "epoch": 2.48900686132118, "grad_norm": 2.6195545196533203, "learning_rate": 2.5253807571405913e-05, "loss": 0.5661, "step": 24850 }, { "epoch": 2.4900085140481796, "grad_norm": 2.390591621398926, "learning_rate": 2.523804361819163e-05, "loss": 0.4755, "step": 24860 }, { "epoch": 2.491010166775179, "grad_norm": 2.008957624435425, "learning_rate": 2.52222795703212e-05, "loss": 0.5102, "step": 24870 }, { "epoch": 2.4920118195021788, "grad_norm": 1.8178975582122803, "learning_rate": 2.520651543406307e-05, "loss": 0.5363, "step": 24880 }, { "epoch": 2.493013472229178, "grad_norm": 1.8573118448257446, "learning_rate": 2.5190751215685727e-05, "loss": 0.4536, "step": 24890 }, { "epoch": 2.494015124956178, "grad_norm": 1.995599627494812, "learning_rate": 2.5174986921457677e-05, "loss": 0.5196, "step": 24900 }, { "epoch": 2.495016777683177, "grad_norm": 2.5662119388580322, "learning_rate": 2.5159222557647473e-05, "loss": 0.4686, "step": 24910 }, { "epoch": 2.4960184304101767, "grad_norm": 2.2614381313323975, "learning_rate": 2.514345813052369e-05, "loss": 0.4819, "step": 24920 }, { "epoch": 2.4970200831371763, "grad_norm": 2.1325461864471436, "learning_rate": 2.5127693646354915e-05, "loss": 0.414, "step": 24930 }, { "epoch": 2.498021735864176, "grad_norm": 2.0967817306518555, "learning_rate": 2.5111929111409795e-05, "loss": 0.4791, "step": 24940 }, { "epoch": 2.4990233885911755, "grad_norm": 2.1351726055145264, "learning_rate": 2.509616453195694e-05, "loss": 0.4869, "step": 24950 }, { "epoch": 2.500025041318175, "grad_norm": 2.286760091781616, "learning_rate": 2.5080399914265034e-05, "loss": 0.5563, "step": 24960 }, { "epoch": 2.5010266940451746, "grad_norm": 2.3411989212036133, "learning_rate": 2.506463526460274e-05, "loss": 0.5398, "step": 24970 }, { "epoch": 2.5020283467721742, "grad_norm": 2.220675230026245, "learning_rate": 2.504887058923875e-05, "loss": 0.5249, "step": 24980 }, { "epoch": 2.503029999499174, "grad_norm": 2.0709569454193115, "learning_rate": 2.503310589444176e-05, "loss": 0.5202, "step": 24990 }, { "epoch": 2.504031652226173, "grad_norm": 2.3817501068115234, "learning_rate": 2.501734118648047e-05, "loss": 0.4466, "step": 25000 }, { "epoch": 2.505033304953173, "grad_norm": 2.626307487487793, "learning_rate": 2.50015764716236e-05, "loss": 0.4598, "step": 25010 }, { "epoch": 2.506034957680172, "grad_norm": 2.288289785385132, "learning_rate": 2.498581175613986e-05, "loss": 0.4345, "step": 25020 }, { "epoch": 2.5070366104071717, "grad_norm": 2.8735311031341553, "learning_rate": 2.497004704629796e-05, "loss": 0.4907, "step": 25030 }, { "epoch": 2.5080382631341713, "grad_norm": 2.262436866760254, "learning_rate": 2.4954282348366613e-05, "loss": 0.5129, "step": 25040 }, { "epoch": 2.509039915861171, "grad_norm": 2.4624216556549072, "learning_rate": 2.493851766861452e-05, "loss": 0.43, "step": 25050 }, { "epoch": 2.5100415685881705, "grad_norm": 2.532715082168579, "learning_rate": 2.4922753013310397e-05, "loss": 0.4568, "step": 25060 }, { "epoch": 2.51104322131517, "grad_norm": 2.2034213542938232, "learning_rate": 2.490698838872292e-05, "loss": 0.4944, "step": 25070 }, { "epoch": 2.5120448740421697, "grad_norm": 1.553830623626709, "learning_rate": 2.489122380112076e-05, "loss": 0.4666, "step": 25080 }, { "epoch": 2.5130465267691693, "grad_norm": 2.5888707637786865, "learning_rate": 2.48754592567726e-05, "loss": 0.4994, "step": 25090 }, { "epoch": 2.514048179496169, "grad_norm": 2.0383970737457275, "learning_rate": 2.4859694761947074e-05, "loss": 0.4831, "step": 25100 }, { "epoch": 2.515049832223168, "grad_norm": 2.4339680671691895, "learning_rate": 2.4843930322912795e-05, "loss": 0.4834, "step": 25110 }, { "epoch": 2.5160514849501676, "grad_norm": 2.0037882328033447, "learning_rate": 2.482816594593839e-05, "loss": 0.5053, "step": 25120 }, { "epoch": 2.517053137677167, "grad_norm": 2.148824691772461, "learning_rate": 2.481240163729243e-05, "loss": 0.4823, "step": 25130 }, { "epoch": 2.518054790404167, "grad_norm": 2.539546251296997, "learning_rate": 2.4796637403243462e-05, "loss": 0.5396, "step": 25140 }, { "epoch": 2.5190564431311664, "grad_norm": 2.2066774368286133, "learning_rate": 2.478087325006e-05, "loss": 0.4696, "step": 25150 }, { "epoch": 2.520058095858166, "grad_norm": 2.336604595184326, "learning_rate": 2.476510918401056e-05, "loss": 0.4451, "step": 25160 }, { "epoch": 2.5210597485851656, "grad_norm": 2.245626211166382, "learning_rate": 2.4749345211363574e-05, "loss": 0.4704, "step": 25170 }, { "epoch": 2.522061401312165, "grad_norm": 2.162360429763794, "learning_rate": 2.473358133838746e-05, "loss": 0.4521, "step": 25180 }, { "epoch": 2.5230630540391648, "grad_norm": 1.6677041053771973, "learning_rate": 2.4717817571350617e-05, "loss": 0.4513, "step": 25190 }, { "epoch": 2.524064706766164, "grad_norm": 1.8102422952651978, "learning_rate": 2.470205391652137e-05, "loss": 0.4525, "step": 25200 }, { "epoch": 2.525066359493164, "grad_norm": 3.0435376167297363, "learning_rate": 2.468629038016799e-05, "loss": 0.5204, "step": 25210 }, { "epoch": 2.526068012220163, "grad_norm": 2.122325897216797, "learning_rate": 2.4670526968558755e-05, "loss": 0.4684, "step": 25220 }, { "epoch": 2.5270696649471627, "grad_norm": 2.4607677459716797, "learning_rate": 2.465476368796184e-05, "loss": 0.554, "step": 25230 }, { "epoch": 2.5280713176741623, "grad_norm": 1.8819490671157837, "learning_rate": 2.4639000544645403e-05, "loss": 0.4483, "step": 25240 }, { "epoch": 2.529072970401162, "grad_norm": 2.0729873180389404, "learning_rate": 2.4623237544877514e-05, "loss": 0.4738, "step": 25250 }, { "epoch": 2.5300746231281614, "grad_norm": 1.9538005590438843, "learning_rate": 2.4607474694926213e-05, "loss": 0.4464, "step": 25260 }, { "epoch": 2.531076275855161, "grad_norm": 2.4094536304473877, "learning_rate": 2.459171200105948e-05, "loss": 0.4938, "step": 25270 }, { "epoch": 2.5320779285821606, "grad_norm": 2.4971518516540527, "learning_rate": 2.4575949469545213e-05, "loss": 0.5492, "step": 25280 }, { "epoch": 2.53307958130916, "grad_norm": 2.3595423698425293, "learning_rate": 2.4560187106651257e-05, "loss": 0.4369, "step": 25290 }, { "epoch": 2.53408123403616, "grad_norm": 1.8320460319519043, "learning_rate": 2.4544424918645396e-05, "loss": 0.4955, "step": 25300 }, { "epoch": 2.535082886763159, "grad_norm": 2.384916305541992, "learning_rate": 2.4528662911795342e-05, "loss": 0.4669, "step": 25310 }, { "epoch": 2.536084539490159, "grad_norm": 2.4493815898895264, "learning_rate": 2.451290109236872e-05, "loss": 0.4963, "step": 25320 }, { "epoch": 2.537086192217158, "grad_norm": 1.902748942375183, "learning_rate": 2.449713946663309e-05, "loss": 0.4792, "step": 25330 }, { "epoch": 2.5380878449441577, "grad_norm": 2.6030337810516357, "learning_rate": 2.4481378040855955e-05, "loss": 0.4986, "step": 25340 }, { "epoch": 2.5390894976711573, "grad_norm": 3.346127510070801, "learning_rate": 2.446561682130471e-05, "loss": 0.4734, "step": 25350 }, { "epoch": 2.540091150398157, "grad_norm": 2.2294840812683105, "learning_rate": 2.4449855814246668e-05, "loss": 0.443, "step": 25360 }, { "epoch": 2.5410928031251565, "grad_norm": 1.9707425832748413, "learning_rate": 2.4434095025949085e-05, "loss": 0.5229, "step": 25370 }, { "epoch": 2.542094455852156, "grad_norm": 2.2785727977752686, "learning_rate": 2.4418334462679112e-05, "loss": 0.4674, "step": 25380 }, { "epoch": 2.5430961085791557, "grad_norm": 2.2725393772125244, "learning_rate": 2.4402574130703795e-05, "loss": 0.4049, "step": 25390 }, { "epoch": 2.5440977613061553, "grad_norm": 1.964576244354248, "learning_rate": 2.438681403629012e-05, "loss": 0.5268, "step": 25400 }, { "epoch": 2.545099414033155, "grad_norm": 2.273085594177246, "learning_rate": 2.437105418570495e-05, "loss": 0.4817, "step": 25410 }, { "epoch": 2.546101066760154, "grad_norm": 2.092212438583374, "learning_rate": 2.4355294585215084e-05, "loss": 0.4439, "step": 25420 }, { "epoch": 2.547102719487154, "grad_norm": 2.514829635620117, "learning_rate": 2.433953524108718e-05, "loss": 0.492, "step": 25430 }, { "epoch": 2.548104372214153, "grad_norm": 2.0971784591674805, "learning_rate": 2.4323776159587828e-05, "loss": 0.4162, "step": 25440 }, { "epoch": 2.549106024941153, "grad_norm": 2.2613589763641357, "learning_rate": 2.4308017346983508e-05, "loss": 0.5042, "step": 25450 }, { "epoch": 2.5501076776681524, "grad_norm": 2.3998570442199707, "learning_rate": 2.429225880954056e-05, "loss": 0.4472, "step": 25460 }, { "epoch": 2.551109330395152, "grad_norm": 2.4489238262176514, "learning_rate": 2.4276500553525267e-05, "loss": 0.4959, "step": 25470 }, { "epoch": 2.5521109831221516, "grad_norm": 2.075310468673706, "learning_rate": 2.4260742585203755e-05, "loss": 0.5041, "step": 25480 }, { "epoch": 2.553112635849151, "grad_norm": 2.417213201522827, "learning_rate": 2.4244984910842076e-05, "loss": 0.4575, "step": 25490 }, { "epoch": 2.5541142885761507, "grad_norm": 2.008910655975342, "learning_rate": 2.4229227536706126e-05, "loss": 0.4455, "step": 25500 }, { "epoch": 2.5551159413031503, "grad_norm": 2.7786033153533936, "learning_rate": 2.42134704690617e-05, "loss": 0.4653, "step": 25510 }, { "epoch": 2.55611759403015, "grad_norm": 1.7164369821548462, "learning_rate": 2.4197713714174477e-05, "loss": 0.4826, "step": 25520 }, { "epoch": 2.557119246757149, "grad_norm": 2.7447457313537598, "learning_rate": 2.418195727831001e-05, "loss": 0.4789, "step": 25530 }, { "epoch": 2.558120899484149, "grad_norm": 1.7751262187957764, "learning_rate": 2.4166201167733705e-05, "loss": 0.5007, "step": 25540 }, { "epoch": 2.5591225522111483, "grad_norm": 2.5949177742004395, "learning_rate": 2.415044538871086e-05, "loss": 0.475, "step": 25550 }, { "epoch": 2.560124204938148, "grad_norm": 2.2382097244262695, "learning_rate": 2.413468994750665e-05, "loss": 0.4627, "step": 25560 }, { "epoch": 2.5611258576651474, "grad_norm": 3.2741479873657227, "learning_rate": 2.4118934850386076e-05, "loss": 0.4902, "step": 25570 }, { "epoch": 2.562127510392147, "grad_norm": 1.8402247428894043, "learning_rate": 2.4103180103614042e-05, "loss": 0.5062, "step": 25580 }, { "epoch": 2.5631291631191466, "grad_norm": 3.088942766189575, "learning_rate": 2.408742571345529e-05, "loss": 0.452, "step": 25590 }, { "epoch": 2.564130815846146, "grad_norm": 2.6764485836029053, "learning_rate": 2.4071671686174444e-05, "loss": 0.4863, "step": 25600 }, { "epoch": 2.565132468573146, "grad_norm": 1.7545393705368042, "learning_rate": 2.4055918028035944e-05, "loss": 0.5092, "step": 25610 }, { "epoch": 2.5661341213001454, "grad_norm": 2.4561171531677246, "learning_rate": 2.404016474530412e-05, "loss": 0.4584, "step": 25620 }, { "epoch": 2.567135774027145, "grad_norm": 2.4661591053009033, "learning_rate": 2.4024411844243136e-05, "loss": 0.47, "step": 25630 }, { "epoch": 2.568137426754144, "grad_norm": 2.1460041999816895, "learning_rate": 2.4008659331117018e-05, "loss": 0.5056, "step": 25640 }, { "epoch": 2.5691390794811437, "grad_norm": 2.1262660026550293, "learning_rate": 2.3992907212189615e-05, "loss": 0.511, "step": 25650 }, { "epoch": 2.5701407322081433, "grad_norm": 2.1861934661865234, "learning_rate": 2.3977155493724624e-05, "loss": 0.4645, "step": 25660 }, { "epoch": 2.571142384935143, "grad_norm": 4.291914939880371, "learning_rate": 2.3961404181985613e-05, "loss": 0.5166, "step": 25670 }, { "epoch": 2.5721440376621425, "grad_norm": 2.78710675239563, "learning_rate": 2.394565328323595e-05, "loss": 0.5688, "step": 25680 }, { "epoch": 2.573145690389142, "grad_norm": 2.88307523727417, "learning_rate": 2.3929902803738852e-05, "loss": 0.4476, "step": 25690 }, { "epoch": 2.5741473431161417, "grad_norm": 2.102853298187256, "learning_rate": 2.391415274975738e-05, "loss": 0.522, "step": 25700 }, { "epoch": 2.5751489958431413, "grad_norm": 2.1062817573547363, "learning_rate": 2.3898403127554423e-05, "loss": 0.5218, "step": 25710 }, { "epoch": 2.576150648570141, "grad_norm": 2.3642542362213135, "learning_rate": 2.3882653943392678e-05, "loss": 0.4886, "step": 25720 }, { "epoch": 2.57715230129714, "grad_norm": 1.5191824436187744, "learning_rate": 2.3866905203534693e-05, "loss": 0.4415, "step": 25730 }, { "epoch": 2.57815395402414, "grad_norm": 2.072801113128662, "learning_rate": 2.385115691424283e-05, "loss": 0.4764, "step": 25740 }, { "epoch": 2.579155606751139, "grad_norm": 2.228182554244995, "learning_rate": 2.3835409081779252e-05, "loss": 0.5007, "step": 25750 }, { "epoch": 2.580157259478139, "grad_norm": 1.7731989622116089, "learning_rate": 2.3819661712405983e-05, "loss": 0.4685, "step": 25760 }, { "epoch": 2.5811589122051384, "grad_norm": 1.9319185018539429, "learning_rate": 2.3803914812384823e-05, "loss": 0.4991, "step": 25770 }, { "epoch": 2.582160564932138, "grad_norm": 2.0118401050567627, "learning_rate": 2.3788168387977424e-05, "loss": 0.5158, "step": 25780 }, { "epoch": 2.5831622176591376, "grad_norm": 1.7415335178375244, "learning_rate": 2.377242244544519e-05, "loss": 0.4717, "step": 25790 }, { "epoch": 2.584163870386137, "grad_norm": 2.1140804290771484, "learning_rate": 2.3756676991049402e-05, "loss": 0.4628, "step": 25800 }, { "epoch": 2.5851655231131367, "grad_norm": 1.9852862358093262, "learning_rate": 2.3740932031051092e-05, "loss": 0.5444, "step": 25810 }, { "epoch": 2.5861671758401363, "grad_norm": 1.569001317024231, "learning_rate": 2.3725187571711138e-05, "loss": 0.478, "step": 25820 }, { "epoch": 2.587168828567136, "grad_norm": 1.7986372709274292, "learning_rate": 2.3709443619290184e-05, "loss": 0.4702, "step": 25830 }, { "epoch": 2.588170481294135, "grad_norm": 2.3105223178863525, "learning_rate": 2.3693700180048688e-05, "loss": 0.4755, "step": 25840 }, { "epoch": 2.589172134021135, "grad_norm": 1.8925827741622925, "learning_rate": 2.3677957260246917e-05, "loss": 0.4492, "step": 25850 }, { "epoch": 2.5901737867481343, "grad_norm": 2.353822946548462, "learning_rate": 2.36622148661449e-05, "loss": 0.5658, "step": 25860 }, { "epoch": 2.591175439475134, "grad_norm": 2.317485809326172, "learning_rate": 2.364647300400248e-05, "loss": 0.4531, "step": 25870 }, { "epoch": 2.5921770922021334, "grad_norm": 1.9144796133041382, "learning_rate": 2.363073168007929e-05, "loss": 0.4664, "step": 25880 }, { "epoch": 2.593178744929133, "grad_norm": 2.278697967529297, "learning_rate": 2.361499090063474e-05, "loss": 0.4907, "step": 25890 }, { "epoch": 2.5941803976561326, "grad_norm": 2.4261505603790283, "learning_rate": 2.3599250671928014e-05, "loss": 0.4243, "step": 25900 }, { "epoch": 2.595182050383132, "grad_norm": 2.083456039428711, "learning_rate": 2.3583511000218103e-05, "loss": 0.4674, "step": 25910 }, { "epoch": 2.596183703110132, "grad_norm": 2.447664737701416, "learning_rate": 2.3567771891763763e-05, "loss": 0.493, "step": 25920 }, { "epoch": 2.5971853558371314, "grad_norm": 2.034817934036255, "learning_rate": 2.3552033352823505e-05, "loss": 0.4827, "step": 25930 }, { "epoch": 2.598187008564131, "grad_norm": 2.444237470626831, "learning_rate": 2.3536295389655654e-05, "loss": 0.4507, "step": 25940 }, { "epoch": 2.59918866129113, "grad_norm": 2.6944849491119385, "learning_rate": 2.3520558008518273e-05, "loss": 0.5128, "step": 25950 }, { "epoch": 2.60019031401813, "grad_norm": 1.7680457830429077, "learning_rate": 2.3504821215669228e-05, "loss": 0.4819, "step": 25960 }, { "epoch": 2.6011919667451293, "grad_norm": 2.6250157356262207, "learning_rate": 2.3489085017366097e-05, "loss": 0.5471, "step": 25970 }, { "epoch": 2.602193619472129, "grad_norm": 1.8260471820831299, "learning_rate": 2.3473349419866275e-05, "loss": 0.4382, "step": 25980 }, { "epoch": 2.6031952721991285, "grad_norm": 2.2221004962921143, "learning_rate": 2.345761442942689e-05, "loss": 0.4822, "step": 25990 }, { "epoch": 2.604196924926128, "grad_norm": 1.932762622833252, "learning_rate": 2.3441880052304842e-05, "loss": 0.5148, "step": 26000 }, { "epoch": 2.6051985776531277, "grad_norm": 2.2098388671875, "learning_rate": 2.3426146294756774e-05, "loss": 0.5548, "step": 26010 }, { "epoch": 2.6062002303801273, "grad_norm": 2.6943869590759277, "learning_rate": 2.3410413163039088e-05, "loss": 0.4284, "step": 26020 }, { "epoch": 2.607201883107127, "grad_norm": 1.8194222450256348, "learning_rate": 2.3394680663407954e-05, "loss": 0.4428, "step": 26030 }, { "epoch": 2.6082035358341265, "grad_norm": 2.8261239528656006, "learning_rate": 2.3378948802119254e-05, "loss": 0.5257, "step": 26040 }, { "epoch": 2.609205188561126, "grad_norm": 2.3305418491363525, "learning_rate": 2.3363217585428644e-05, "loss": 0.3921, "step": 26050 }, { "epoch": 2.610206841288125, "grad_norm": 1.934168815612793, "learning_rate": 2.3347487019591524e-05, "loss": 0.4497, "step": 26060 }, { "epoch": 2.6112084940151252, "grad_norm": 2.6037657260894775, "learning_rate": 2.3331757110863028e-05, "loss": 0.5347, "step": 26070 }, { "epoch": 2.6122101467421244, "grad_norm": 2.024768829345703, "learning_rate": 2.3316027865498017e-05, "loss": 0.4866, "step": 26080 }, { "epoch": 2.613211799469124, "grad_norm": 2.0269052982330322, "learning_rate": 2.330029928975111e-05, "loss": 0.4654, "step": 26090 }, { "epoch": 2.6142134521961236, "grad_norm": 2.378782033920288, "learning_rate": 2.3284571389876643e-05, "loss": 0.5363, "step": 26100 }, { "epoch": 2.615215104923123, "grad_norm": 2.4418351650238037, "learning_rate": 2.3268844172128703e-05, "loss": 0.4381, "step": 26110 }, { "epoch": 2.6162167576501227, "grad_norm": 2.0797080993652344, "learning_rate": 2.325311764276108e-05, "loss": 0.4586, "step": 26120 }, { "epoch": 2.6172184103771223, "grad_norm": 2.9233202934265137, "learning_rate": 2.323739180802731e-05, "loss": 0.5051, "step": 26130 }, { "epoch": 2.618220063104122, "grad_norm": 1.9914222955703735, "learning_rate": 2.3221666674180647e-05, "loss": 0.4812, "step": 26140 }, { "epoch": 2.6192217158311215, "grad_norm": 1.946094036102295, "learning_rate": 2.320594224747405e-05, "loss": 0.5021, "step": 26150 }, { "epoch": 2.620223368558121, "grad_norm": 2.0537545680999756, "learning_rate": 2.3190218534160233e-05, "loss": 0.4171, "step": 26160 }, { "epoch": 2.6212250212851202, "grad_norm": 2.107255697250366, "learning_rate": 2.3174495540491588e-05, "loss": 0.4683, "step": 26170 }, { "epoch": 2.62222667401212, "grad_norm": 1.7646708488464355, "learning_rate": 2.3158773272720254e-05, "loss": 0.4256, "step": 26180 }, { "epoch": 2.6232283267391194, "grad_norm": 1.94224214553833, "learning_rate": 2.3143051737098054e-05, "loss": 0.4832, "step": 26190 }, { "epoch": 2.624229979466119, "grad_norm": 2.5952892303466797, "learning_rate": 2.312733093987653e-05, "loss": 0.4565, "step": 26200 }, { "epoch": 2.6252316321931186, "grad_norm": 1.9447942972183228, "learning_rate": 2.3111610887306946e-05, "loss": 0.4124, "step": 26210 }, { "epoch": 2.626233284920118, "grad_norm": 2.0823614597320557, "learning_rate": 2.3095891585640246e-05, "loss": 0.5009, "step": 26220 }, { "epoch": 2.627234937647118, "grad_norm": 2.4931070804595947, "learning_rate": 2.3080173041127074e-05, "loss": 0.4633, "step": 26230 }, { "epoch": 2.6282365903741174, "grad_norm": 2.088278293609619, "learning_rate": 2.3064455260017803e-05, "loss": 0.4155, "step": 26240 }, { "epoch": 2.629238243101117, "grad_norm": 2.1908822059631348, "learning_rate": 2.3048738248562478e-05, "loss": 0.3935, "step": 26250 }, { "epoch": 2.630239895828116, "grad_norm": 2.7435402870178223, "learning_rate": 2.3033022013010834e-05, "loss": 0.4813, "step": 26260 }, { "epoch": 2.631241548555116, "grad_norm": 2.0322299003601074, "learning_rate": 2.301730655961232e-05, "loss": 0.4314, "step": 26270 }, { "epoch": 2.6322432012821153, "grad_norm": 2.4347589015960693, "learning_rate": 2.300159189461605e-05, "loss": 0.4644, "step": 26280 }, { "epoch": 2.633244854009115, "grad_norm": 2.0065510272979736, "learning_rate": 2.298587802427085e-05, "loss": 0.4897, "step": 26290 }, { "epoch": 2.6342465067361145, "grad_norm": 2.5837438106536865, "learning_rate": 2.2970164954825192e-05, "loss": 0.469, "step": 26300 }, { "epoch": 2.635248159463114, "grad_norm": 2.4908740520477295, "learning_rate": 2.2954452692527277e-05, "loss": 0.4813, "step": 26310 }, { "epoch": 2.6362498121901137, "grad_norm": 2.1361074447631836, "learning_rate": 2.293874124362495e-05, "loss": 0.4876, "step": 26320 }, { "epoch": 2.6372514649171133, "grad_norm": 2.353395462036133, "learning_rate": 2.2923030614365735e-05, "loss": 0.4718, "step": 26330 }, { "epoch": 2.638253117644113, "grad_norm": 1.8883154392242432, "learning_rate": 2.290732081099685e-05, "loss": 0.5121, "step": 26340 }, { "epoch": 2.6392547703711124, "grad_norm": 2.065039873123169, "learning_rate": 2.289161183976517e-05, "loss": 0.488, "step": 26350 }, { "epoch": 2.640256423098112, "grad_norm": 2.287727117538452, "learning_rate": 2.287590370691725e-05, "loss": 0.4939, "step": 26360 }, { "epoch": 2.641258075825111, "grad_norm": 2.5591814517974854, "learning_rate": 2.2860196418699293e-05, "loss": 0.4275, "step": 26370 }, { "epoch": 2.642259728552111, "grad_norm": 2.074220657348633, "learning_rate": 2.284448998135717e-05, "loss": 0.4809, "step": 26380 }, { "epoch": 2.6432613812791104, "grad_norm": 1.7956701517105103, "learning_rate": 2.282878440113644e-05, "loss": 0.4729, "step": 26390 }, { "epoch": 2.64426303400611, "grad_norm": 2.273275375366211, "learning_rate": 2.28130796842823e-05, "loss": 0.5459, "step": 26400 }, { "epoch": 2.6452646867331095, "grad_norm": 2.206364393234253, "learning_rate": 2.2797375837039586e-05, "loss": 0.5378, "step": 26410 }, { "epoch": 2.646266339460109, "grad_norm": 2.165149688720703, "learning_rate": 2.2781672865652824e-05, "loss": 0.4299, "step": 26420 }, { "epoch": 2.6472679921871087, "grad_norm": 2.0546000003814697, "learning_rate": 2.276597077636618e-05, "loss": 0.4466, "step": 26430 }, { "epoch": 2.6482696449141083, "grad_norm": 1.8035660982131958, "learning_rate": 2.2750269575423447e-05, "loss": 0.4712, "step": 26440 }, { "epoch": 2.649271297641108, "grad_norm": 2.0470290184020996, "learning_rate": 2.2734569269068092e-05, "loss": 0.4758, "step": 26450 }, { "epoch": 2.6502729503681075, "grad_norm": 2.314500570297241, "learning_rate": 2.271886986354322e-05, "loss": 0.5292, "step": 26460 }, { "epoch": 2.651274603095107, "grad_norm": 1.8078278303146362, "learning_rate": 2.2703171365091577e-05, "loss": 0.4468, "step": 26470 }, { "epoch": 2.6522762558221062, "grad_norm": 2.535794734954834, "learning_rate": 2.2687473779955526e-05, "loss": 0.4847, "step": 26480 }, { "epoch": 2.6532779085491063, "grad_norm": 2.081583023071289, "learning_rate": 2.267177711437711e-05, "loss": 0.4684, "step": 26490 }, { "epoch": 2.6542795612761054, "grad_norm": 2.319704294204712, "learning_rate": 2.2656081374597976e-05, "loss": 0.4822, "step": 26500 }, { "epoch": 2.655281214003105, "grad_norm": 2.5577144622802734, "learning_rate": 2.2640386566859398e-05, "loss": 0.4674, "step": 26510 }, { "epoch": 2.6562828667301046, "grad_norm": 1.8538458347320557, "learning_rate": 2.2624692697402306e-05, "loss": 0.4069, "step": 26520 }, { "epoch": 2.657284519457104, "grad_norm": 2.406271457672119, "learning_rate": 2.2608999772467228e-05, "loss": 0.5484, "step": 26530 }, { "epoch": 2.658286172184104, "grad_norm": 2.2202024459838867, "learning_rate": 2.2593307798294355e-05, "loss": 0.3974, "step": 26540 }, { "epoch": 2.6592878249111034, "grad_norm": 1.886047601699829, "learning_rate": 2.257761678112345e-05, "loss": 0.4842, "step": 26550 }, { "epoch": 2.660289477638103, "grad_norm": 2.4025936126708984, "learning_rate": 2.256192672719393e-05, "loss": 0.466, "step": 26560 }, { "epoch": 2.6612911303651026, "grad_norm": 2.0745341777801514, "learning_rate": 2.254623764274482e-05, "loss": 0.4589, "step": 26570 }, { "epoch": 2.662292783092102, "grad_norm": 1.9398627281188965, "learning_rate": 2.2530549534014772e-05, "loss": 0.507, "step": 26580 }, { "epoch": 2.6632944358191013, "grad_norm": 2.9563961029052734, "learning_rate": 2.2514862407242012e-05, "loss": 0.4202, "step": 26590 }, { "epoch": 2.6642960885461013, "grad_norm": 2.6384031772613525, "learning_rate": 2.249917626866442e-05, "loss": 0.4569, "step": 26600 }, { "epoch": 2.6652977412731005, "grad_norm": 2.049692153930664, "learning_rate": 2.2483491124519465e-05, "loss": 0.4576, "step": 26610 }, { "epoch": 2.6662993940001, "grad_norm": 2.3128302097320557, "learning_rate": 2.2467806981044207e-05, "loss": 0.4881, "step": 26620 }, { "epoch": 2.6673010467270997, "grad_norm": 1.87588369846344, "learning_rate": 2.2452123844475322e-05, "loss": 0.4082, "step": 26630 }, { "epoch": 2.6683026994540993, "grad_norm": 2.448084592819214, "learning_rate": 2.2436441721049095e-05, "loss": 0.4678, "step": 26640 }, { "epoch": 2.669304352181099, "grad_norm": 2.088381290435791, "learning_rate": 2.2420760617001395e-05, "loss": 0.4423, "step": 26650 }, { "epoch": 2.6703060049080984, "grad_norm": 2.1151883602142334, "learning_rate": 2.2405080538567673e-05, "loss": 0.4899, "step": 26660 }, { "epoch": 2.671307657635098, "grad_norm": 2.232419729232788, "learning_rate": 2.238940149198301e-05, "loss": 0.5182, "step": 26670 }, { "epoch": 2.6723093103620976, "grad_norm": 1.9108939170837402, "learning_rate": 2.237372348348203e-05, "loss": 0.4394, "step": 26680 }, { "epoch": 2.673310963089097, "grad_norm": 2.0978970527648926, "learning_rate": 2.2358046519298997e-05, "loss": 0.5118, "step": 26690 }, { "epoch": 2.6743126158160964, "grad_norm": 2.155327558517456, "learning_rate": 2.234237060566771e-05, "loss": 0.5335, "step": 26700 }, { "epoch": 2.675314268543096, "grad_norm": 2.3690075874328613, "learning_rate": 2.2326695748821565e-05, "loss": 0.4261, "step": 26710 }, { "epoch": 2.6763159212700955, "grad_norm": 1.9474866390228271, "learning_rate": 2.2311021954993572e-05, "loss": 0.5525, "step": 26720 }, { "epoch": 2.677317573997095, "grad_norm": 2.135596990585327, "learning_rate": 2.2295349230416266e-05, "loss": 0.5035, "step": 26730 }, { "epoch": 2.6783192267240947, "grad_norm": 2.132472276687622, "learning_rate": 2.227967758132178e-05, "loss": 0.5243, "step": 26740 }, { "epoch": 2.6793208794510943, "grad_norm": 1.8780027627944946, "learning_rate": 2.226400701394184e-05, "loss": 0.4771, "step": 26750 }, { "epoch": 2.680322532178094, "grad_norm": 1.9652684926986694, "learning_rate": 2.224833753450771e-05, "loss": 0.4811, "step": 26760 }, { "epoch": 2.6813241849050935, "grad_norm": 2.099169969558716, "learning_rate": 2.2232669149250227e-05, "loss": 0.4777, "step": 26770 }, { "epoch": 2.682325837632093, "grad_norm": 2.4007065296173096, "learning_rate": 2.221700186439981e-05, "loss": 0.4176, "step": 26780 }, { "epoch": 2.6833274903590922, "grad_norm": 2.2021305561065674, "learning_rate": 2.2201335686186435e-05, "loss": 0.5119, "step": 26790 }, { "epoch": 2.6843291430860923, "grad_norm": 2.159282922744751, "learning_rate": 2.218567062083962e-05, "loss": 0.4317, "step": 26800 }, { "epoch": 2.6853307958130914, "grad_norm": 2.309873342514038, "learning_rate": 2.217000667458845e-05, "loss": 0.5505, "step": 26810 }, { "epoch": 2.686332448540091, "grad_norm": 2.6905899047851562, "learning_rate": 2.2154343853661586e-05, "loss": 0.4556, "step": 26820 }, { "epoch": 2.6873341012670906, "grad_norm": 1.9428925514221191, "learning_rate": 2.2138682164287217e-05, "loss": 0.4622, "step": 26830 }, { "epoch": 2.68833575399409, "grad_norm": 2.6776959896087646, "learning_rate": 2.212302161269308e-05, "loss": 0.5363, "step": 26840 }, { "epoch": 2.68933740672109, "grad_norm": 2.054520606994629, "learning_rate": 2.2107362205106477e-05, "loss": 0.4442, "step": 26850 }, { "epoch": 2.6903390594480894, "grad_norm": 1.8479695320129395, "learning_rate": 2.209170394775424e-05, "loss": 0.4855, "step": 26860 }, { "epoch": 2.691340712175089, "grad_norm": 2.017530679702759, "learning_rate": 2.2076046846862767e-05, "loss": 0.4774, "step": 26870 }, { "epoch": 2.6923423649020886, "grad_norm": 2.2654614448547363, "learning_rate": 2.2060390908657964e-05, "loss": 0.5063, "step": 26880 }, { "epoch": 2.693344017629088, "grad_norm": 2.8104443550109863, "learning_rate": 2.2044736139365286e-05, "loss": 0.4963, "step": 26890 }, { "epoch": 2.6943456703560873, "grad_norm": 2.534173011779785, "learning_rate": 2.2029082545209748e-05, "loss": 0.5132, "step": 26900 }, { "epoch": 2.6953473230830873, "grad_norm": 2.722679376602173, "learning_rate": 2.2013430132415858e-05, "loss": 0.4832, "step": 26910 }, { "epoch": 2.6963489758100865, "grad_norm": 1.6218446493148804, "learning_rate": 2.1997778907207673e-05, "loss": 0.4437, "step": 26920 }, { "epoch": 2.697350628537086, "grad_norm": 2.0946335792541504, "learning_rate": 2.198212887580879e-05, "loss": 0.4733, "step": 26930 }, { "epoch": 2.6983522812640857, "grad_norm": 2.202590227127075, "learning_rate": 2.196648004444232e-05, "loss": 0.4577, "step": 26940 }, { "epoch": 2.6993539339910853, "grad_norm": 2.196267604827881, "learning_rate": 2.195083241933088e-05, "loss": 0.5106, "step": 26950 }, { "epoch": 2.700355586718085, "grad_norm": 2.427388906478882, "learning_rate": 2.1935186006696634e-05, "loss": 0.4277, "step": 26960 }, { "epoch": 2.7013572394450844, "grad_norm": 2.214708089828491, "learning_rate": 2.191954081276126e-05, "loss": 0.4305, "step": 26970 }, { "epoch": 2.702358892172084, "grad_norm": 2.6666109561920166, "learning_rate": 2.1903896843745932e-05, "loss": 0.4622, "step": 26980 }, { "epoch": 2.7033605448990836, "grad_norm": 2.6057868003845215, "learning_rate": 2.188825410587135e-05, "loss": 0.4764, "step": 26990 }, { "epoch": 2.704362197626083, "grad_norm": 2.430042266845703, "learning_rate": 2.1872612605357735e-05, "loss": 0.4984, "step": 27000 }, { "epoch": 2.7053638503530824, "grad_norm": 2.1184325218200684, "learning_rate": 2.1856972348424805e-05, "loss": 0.4949, "step": 27010 }, { "epoch": 2.7063655030800824, "grad_norm": 1.98492431640625, "learning_rate": 2.184133334129176e-05, "loss": 0.5301, "step": 27020 }, { "epoch": 2.7073671558070815, "grad_norm": 1.8999614715576172, "learning_rate": 2.1825695590177355e-05, "loss": 0.4537, "step": 27030 }, { "epoch": 2.708368808534081, "grad_norm": 3.519404649734497, "learning_rate": 2.1810059101299802e-05, "loss": 0.4338, "step": 27040 }, { "epoch": 2.7093704612610807, "grad_norm": 2.756481409072876, "learning_rate": 2.1794423880876842e-05, "loss": 0.472, "step": 27050 }, { "epoch": 2.7103721139880803, "grad_norm": 2.2912352085113525, "learning_rate": 2.1778789935125674e-05, "loss": 0.4935, "step": 27060 }, { "epoch": 2.71137376671508, "grad_norm": 1.8153929710388184, "learning_rate": 2.1763157270263017e-05, "loss": 0.4422, "step": 27070 }, { "epoch": 2.7123754194420795, "grad_norm": 2.11661434173584, "learning_rate": 2.1747525892505094e-05, "loss": 0.484, "step": 27080 }, { "epoch": 2.713377072169079, "grad_norm": 2.561905860900879, "learning_rate": 2.1731895808067583e-05, "loss": 0.4715, "step": 27090 }, { "epoch": 2.7143787248960787, "grad_norm": 2.2325327396392822, "learning_rate": 2.171626702316565e-05, "loss": 0.4948, "step": 27100 }, { "epoch": 2.7153803776230783, "grad_norm": 2.3941023349761963, "learning_rate": 2.170063954401398e-05, "loss": 0.5549, "step": 27110 }, { "epoch": 2.7163820303500774, "grad_norm": 1.9785363674163818, "learning_rate": 2.1685013376826715e-05, "loss": 0.4756, "step": 27120 }, { "epoch": 2.7173836830770774, "grad_norm": 2.551440715789795, "learning_rate": 2.166938852781746e-05, "loss": 0.5079, "step": 27130 }, { "epoch": 2.7183853358040766, "grad_norm": 2.1827147006988525, "learning_rate": 2.1653765003199315e-05, "loss": 0.4197, "step": 27140 }, { "epoch": 2.719386988531076, "grad_norm": 1.9968968629837036, "learning_rate": 2.163814280918486e-05, "loss": 0.4311, "step": 27150 }, { "epoch": 2.720388641258076, "grad_norm": 1.8684368133544922, "learning_rate": 2.162252195198613e-05, "loss": 0.416, "step": 27160 }, { "epoch": 2.7213902939850754, "grad_norm": 2.0815813541412354, "learning_rate": 2.160690243781463e-05, "loss": 0.4611, "step": 27170 }, { "epoch": 2.722391946712075, "grad_norm": 2.146352767944336, "learning_rate": 2.159128427288134e-05, "loss": 0.4603, "step": 27180 }, { "epoch": 2.7233935994390746, "grad_norm": 2.8626322746276855, "learning_rate": 2.157566746339671e-05, "loss": 0.484, "step": 27190 }, { "epoch": 2.724395252166074, "grad_norm": 1.959716796875, "learning_rate": 2.1560052015570613e-05, "loss": 0.4973, "step": 27200 }, { "epoch": 2.7253969048930737, "grad_norm": 2.7703990936279297, "learning_rate": 2.154443793561243e-05, "loss": 0.4741, "step": 27210 }, { "epoch": 2.7263985576200733, "grad_norm": 2.6336097717285156, "learning_rate": 2.1528825229730966e-05, "loss": 0.4971, "step": 27220 }, { "epoch": 2.7274002103470725, "grad_norm": 2.6301822662353516, "learning_rate": 2.1513213904134496e-05, "loss": 0.4954, "step": 27230 }, { "epoch": 2.728401863074072, "grad_norm": 2.201345920562744, "learning_rate": 2.1497603965030736e-05, "loss": 0.4635, "step": 27240 }, { "epoch": 2.7294035158010717, "grad_norm": 2.9680652618408203, "learning_rate": 2.1481995418626842e-05, "loss": 0.4896, "step": 27250 }, { "epoch": 2.7304051685280712, "grad_norm": 1.5185983180999756, "learning_rate": 2.1466388271129458e-05, "loss": 0.4887, "step": 27260 }, { "epoch": 2.731406821255071, "grad_norm": 2.366058111190796, "learning_rate": 2.1450782528744605e-05, "loss": 0.4411, "step": 27270 }, { "epoch": 2.7324084739820704, "grad_norm": 2.3551275730133057, "learning_rate": 2.143517819767781e-05, "loss": 0.3853, "step": 27280 }, { "epoch": 2.73341012670907, "grad_norm": 1.7083995342254639, "learning_rate": 2.1419575284133993e-05, "loss": 0.4732, "step": 27290 }, { "epoch": 2.7344117794360696, "grad_norm": 2.4988021850585938, "learning_rate": 2.1403973794317543e-05, "loss": 0.546, "step": 27300 }, { "epoch": 2.735413432163069, "grad_norm": 2.1923186779022217, "learning_rate": 2.1388373734432256e-05, "loss": 0.4854, "step": 27310 }, { "epoch": 2.7364150848900683, "grad_norm": 2.622400999069214, "learning_rate": 2.1372775110681376e-05, "loss": 0.4919, "step": 27320 }, { "epoch": 2.7374167376170684, "grad_norm": 2.5335519313812256, "learning_rate": 2.1357177929267574e-05, "loss": 0.5083, "step": 27330 }, { "epoch": 2.7384183903440675, "grad_norm": 2.4166882038116455, "learning_rate": 2.134158219639295e-05, "loss": 0.4475, "step": 27340 }, { "epoch": 2.739420043071067, "grad_norm": 2.1072611808776855, "learning_rate": 2.1325987918259006e-05, "loss": 0.498, "step": 27350 }, { "epoch": 2.7404216957980667, "grad_norm": 2.3170204162597656, "learning_rate": 2.1310395101066692e-05, "loss": 0.4514, "step": 27360 }, { "epoch": 2.7414233485250663, "grad_norm": 2.4249660968780518, "learning_rate": 2.129480375101638e-05, "loss": 0.4603, "step": 27370 }, { "epoch": 2.742425001252066, "grad_norm": 1.9688012599945068, "learning_rate": 2.1279213874307818e-05, "loss": 0.4447, "step": 27380 }, { "epoch": 2.7434266539790655, "grad_norm": 2.5289130210876465, "learning_rate": 2.126362547714022e-05, "loss": 0.4885, "step": 27390 }, { "epoch": 2.744428306706065, "grad_norm": 1.826878309249878, "learning_rate": 2.1248038565712175e-05, "loss": 0.4917, "step": 27400 }, { "epoch": 2.7454299594330647, "grad_norm": 2.649292230606079, "learning_rate": 2.123245314622171e-05, "loss": 0.4552, "step": 27410 }, { "epoch": 2.7464316121600643, "grad_norm": 3.107866048812866, "learning_rate": 2.1216869224866226e-05, "loss": 0.4791, "step": 27420 }, { "epoch": 2.7474332648870634, "grad_norm": 2.3746352195739746, "learning_rate": 2.120128680784255e-05, "loss": 0.4838, "step": 27430 }, { "epoch": 2.7484349176140634, "grad_norm": 1.829545497894287, "learning_rate": 2.118570590134691e-05, "loss": 0.4865, "step": 27440 }, { "epoch": 2.7494365703410626, "grad_norm": 1.9630305767059326, "learning_rate": 2.1170126511574927e-05, "loss": 0.4979, "step": 27450 }, { "epoch": 2.750438223068062, "grad_norm": 2.2091236114501953, "learning_rate": 2.115454864472162e-05, "loss": 0.5135, "step": 27460 }, { "epoch": 2.7514398757950618, "grad_norm": 2.330111265182495, "learning_rate": 2.1138972306981398e-05, "loss": 0.4824, "step": 27470 }, { "epoch": 2.7524415285220614, "grad_norm": 2.7530877590179443, "learning_rate": 2.1123397504548087e-05, "loss": 0.4567, "step": 27480 }, { "epoch": 2.753443181249061, "grad_norm": 2.259148597717285, "learning_rate": 2.1107824243614865e-05, "loss": 0.4989, "step": 27490 }, { "epoch": 2.7544448339760605, "grad_norm": 1.6459332704544067, "learning_rate": 2.109225253037431e-05, "loss": 0.4808, "step": 27500 }, { "epoch": 2.75544648670306, "grad_norm": 2.2297933101654053, "learning_rate": 2.107668237101841e-05, "loss": 0.4954, "step": 27510 }, { "epoch": 2.7564481394300597, "grad_norm": 2.1366751194000244, "learning_rate": 2.106111377173851e-05, "loss": 0.4906, "step": 27520 }, { "epoch": 2.7574497921570593, "grad_norm": 2.337979555130005, "learning_rate": 2.104554673872532e-05, "loss": 0.5064, "step": 27530 }, { "epoch": 2.7584514448840585, "grad_norm": 2.3288912773132324, "learning_rate": 2.102998127816897e-05, "loss": 0.4822, "step": 27540 }, { "epoch": 2.7594530976110585, "grad_norm": 2.4183363914489746, "learning_rate": 2.1014417396258936e-05, "loss": 0.5117, "step": 27550 }, { "epoch": 2.7604547503380577, "grad_norm": 2.034332752227783, "learning_rate": 2.0998855099184058e-05, "loss": 0.4364, "step": 27560 }, { "epoch": 2.7614564030650572, "grad_norm": 2.2769012451171875, "learning_rate": 2.0983294393132575e-05, "loss": 0.4577, "step": 27570 }, { "epoch": 2.762458055792057, "grad_norm": 2.3711085319519043, "learning_rate": 2.0967735284292065e-05, "loss": 0.5053, "step": 27580 }, { "epoch": 2.7634597085190564, "grad_norm": 1.7482993602752686, "learning_rate": 2.09521777788495e-05, "loss": 0.4199, "step": 27590 }, { "epoch": 2.764461361246056, "grad_norm": 1.9056187868118286, "learning_rate": 2.0936621882991186e-05, "loss": 0.4214, "step": 27600 }, { "epoch": 2.7654630139730556, "grad_norm": 1.977642297744751, "learning_rate": 2.0921067602902804e-05, "loss": 0.4373, "step": 27610 }, { "epoch": 2.766464666700055, "grad_norm": 2.014859199523926, "learning_rate": 2.0905514944769382e-05, "loss": 0.4806, "step": 27620 }, { "epoch": 2.767466319427055, "grad_norm": 3.8795015811920166, "learning_rate": 2.0889963914775333e-05, "loss": 0.4404, "step": 27630 }, { "epoch": 2.7684679721540544, "grad_norm": 2.2615413665771484, "learning_rate": 2.0874414519104375e-05, "loss": 0.5025, "step": 27640 }, { "epoch": 2.7694696248810535, "grad_norm": 3.039480209350586, "learning_rate": 2.085886676393961e-05, "loss": 0.4841, "step": 27650 }, { "epoch": 2.7704712776080536, "grad_norm": 2.475024700164795, "learning_rate": 2.084332065546349e-05, "loss": 0.5154, "step": 27660 }, { "epoch": 2.7714729303350527, "grad_norm": 2.402113437652588, "learning_rate": 2.082777619985778e-05, "loss": 0.4713, "step": 27670 }, { "epoch": 2.7724745830620523, "grad_norm": 1.7097605466842651, "learning_rate": 2.0812233403303625e-05, "loss": 0.4631, "step": 27680 }, { "epoch": 2.773476235789052, "grad_norm": 2.578178644180298, "learning_rate": 2.079669227198149e-05, "loss": 0.4864, "step": 27690 }, { "epoch": 2.7744778885160515, "grad_norm": 2.374117374420166, "learning_rate": 2.0781152812071187e-05, "loss": 0.4303, "step": 27700 }, { "epoch": 2.775479541243051, "grad_norm": 2.412297010421753, "learning_rate": 2.0765615029751843e-05, "loss": 0.5018, "step": 27710 }, { "epoch": 2.7764811939700507, "grad_norm": 2.166706085205078, "learning_rate": 2.075007893120195e-05, "loss": 0.4195, "step": 27720 }, { "epoch": 2.7774828466970503, "grad_norm": 2.3247227668762207, "learning_rate": 2.0734544522599298e-05, "loss": 0.4862, "step": 27730 }, { "epoch": 2.77848449942405, "grad_norm": 2.5825328826904297, "learning_rate": 2.0719011810121043e-05, "loss": 0.5212, "step": 27740 }, { "epoch": 2.7794861521510494, "grad_norm": 3.512326955795288, "learning_rate": 2.070348079994363e-05, "loss": 0.5204, "step": 27750 }, { "epoch": 2.7804878048780486, "grad_norm": 2.1720283031463623, "learning_rate": 2.0687951498242833e-05, "loss": 0.4784, "step": 27760 }, { "epoch": 2.781489457605048, "grad_norm": 2.6881589889526367, "learning_rate": 2.067242391119378e-05, "loss": 0.4718, "step": 27770 }, { "epoch": 2.7824911103320478, "grad_norm": 1.799422025680542, "learning_rate": 2.0656898044970866e-05, "loss": 0.4398, "step": 27780 }, { "epoch": 2.7834927630590474, "grad_norm": 2.7559547424316406, "learning_rate": 2.064137390574784e-05, "loss": 0.5155, "step": 27790 }, { "epoch": 2.784494415786047, "grad_norm": 2.2045767307281494, "learning_rate": 2.0625851499697747e-05, "loss": 0.5234, "step": 27800 }, { "epoch": 2.7854960685130465, "grad_norm": 3.2407376766204834, "learning_rate": 2.0610330832992966e-05, "loss": 0.514, "step": 27810 }, { "epoch": 2.786497721240046, "grad_norm": 1.689829707145691, "learning_rate": 2.0594811911805145e-05, "loss": 0.3743, "step": 27820 }, { "epoch": 2.7874993739670457, "grad_norm": 2.45869517326355, "learning_rate": 2.0579294742305263e-05, "loss": 0.5228, "step": 27830 }, { "epoch": 2.7885010266940453, "grad_norm": 2.170257806777954, "learning_rate": 2.0563779330663614e-05, "loss": 0.4958, "step": 27840 }, { "epoch": 2.7895026794210445, "grad_norm": 2.5842323303222656, "learning_rate": 2.0548265683049763e-05, "loss": 0.5162, "step": 27850 }, { "epoch": 2.7905043321480445, "grad_norm": 2.090120315551758, "learning_rate": 2.0532753805632588e-05, "loss": 0.4321, "step": 27860 }, { "epoch": 2.7915059848750436, "grad_norm": 1.8928121328353882, "learning_rate": 2.051724370458027e-05, "loss": 0.4296, "step": 27870 }, { "epoch": 2.7925076376020432, "grad_norm": 2.0915029048919678, "learning_rate": 2.050173538606028e-05, "loss": 0.4516, "step": 27880 }, { "epoch": 2.793509290329043, "grad_norm": 2.43007230758667, "learning_rate": 2.0486228856239366e-05, "loss": 0.4801, "step": 27890 }, { "epoch": 2.7945109430560424, "grad_norm": 1.7025607824325562, "learning_rate": 2.0470724121283584e-05, "loss": 0.5191, "step": 27900 }, { "epoch": 2.795512595783042, "grad_norm": 2.6428115367889404, "learning_rate": 2.0455221187358258e-05, "loss": 0.4427, "step": 27910 }, { "epoch": 2.7965142485100416, "grad_norm": 2.2131521701812744, "learning_rate": 2.043972006062803e-05, "loss": 0.5153, "step": 27920 }, { "epoch": 2.797515901237041, "grad_norm": 2.5547220706939697, "learning_rate": 2.0424220747256777e-05, "loss": 0.4719, "step": 27930 }, { "epoch": 2.798517553964041, "grad_norm": 2.3055248260498047, "learning_rate": 2.040872325340769e-05, "loss": 0.4596, "step": 27940 }, { "epoch": 2.7995192066910404, "grad_norm": 2.9861836433410645, "learning_rate": 2.039322758524322e-05, "loss": 0.4723, "step": 27950 }, { "epoch": 2.8005208594180395, "grad_norm": 1.8417468070983887, "learning_rate": 2.0377733748925082e-05, "loss": 0.5091, "step": 27960 }, { "epoch": 2.8015225121450396, "grad_norm": 2.0634090900421143, "learning_rate": 2.0362241750614298e-05, "loss": 0.4419, "step": 27970 }, { "epoch": 2.8025241648720387, "grad_norm": 1.7758311033248901, "learning_rate": 2.034675159647112e-05, "loss": 0.5149, "step": 27980 }, { "epoch": 2.8035258175990383, "grad_norm": 2.2538533210754395, "learning_rate": 2.0331263292655106e-05, "loss": 0.5028, "step": 27990 }, { "epoch": 2.804527470326038, "grad_norm": 2.3173768520355225, "learning_rate": 2.0315776845325038e-05, "loss": 0.4044, "step": 28000 }, { "epoch": 2.8055291230530375, "grad_norm": 1.9004391431808472, "learning_rate": 2.030029226063898e-05, "loss": 0.4568, "step": 28010 }, { "epoch": 2.806530775780037, "grad_norm": 2.0229578018188477, "learning_rate": 2.028480954475427e-05, "loss": 0.4042, "step": 28020 }, { "epoch": 2.8075324285070367, "grad_norm": 2.288827657699585, "learning_rate": 2.0269328703827468e-05, "loss": 0.4943, "step": 28030 }, { "epoch": 2.8085340812340363, "grad_norm": 2.060798168182373, "learning_rate": 2.0253849744014405e-05, "loss": 0.4906, "step": 28040 }, { "epoch": 2.809535733961036, "grad_norm": 1.997622013092041, "learning_rate": 2.0238372671470186e-05, "loss": 0.4681, "step": 28050 }, { "epoch": 2.8105373866880354, "grad_norm": 2.37577748298645, "learning_rate": 2.0222897492349137e-05, "loss": 0.4847, "step": 28060 }, { "epoch": 2.8115390394150346, "grad_norm": 2.299891471862793, "learning_rate": 2.0207424212804822e-05, "loss": 0.4673, "step": 28070 }, { "epoch": 2.8125406921420346, "grad_norm": 1.9214786291122437, "learning_rate": 2.0191952838990093e-05, "loss": 0.4824, "step": 28080 }, { "epoch": 2.8135423448690338, "grad_norm": 2.1179656982421875, "learning_rate": 2.0176483377056997e-05, "loss": 0.5231, "step": 28090 }, { "epoch": 2.8145439975960334, "grad_norm": 1.6549978256225586, "learning_rate": 2.0161015833156877e-05, "loss": 0.5406, "step": 28100 }, { "epoch": 2.815545650323033, "grad_norm": 1.342764973640442, "learning_rate": 2.0145550213440233e-05, "loss": 0.3921, "step": 28110 }, { "epoch": 2.8165473030500325, "grad_norm": 2.5125224590301514, "learning_rate": 2.0130086524056873e-05, "loss": 0.476, "step": 28120 }, { "epoch": 2.817548955777032, "grad_norm": 2.1776862144470215, "learning_rate": 2.011462477115581e-05, "loss": 0.5087, "step": 28130 }, { "epoch": 2.8185506085040317, "grad_norm": 2.026686668395996, "learning_rate": 2.009916496088527e-05, "loss": 0.461, "step": 28140 }, { "epoch": 2.8195522612310313, "grad_norm": 2.4303576946258545, "learning_rate": 2.008370709939274e-05, "loss": 0.4916, "step": 28150 }, { "epoch": 2.820553913958031, "grad_norm": 2.582199811935425, "learning_rate": 2.0068251192824904e-05, "loss": 0.4763, "step": 28160 }, { "epoch": 2.8215555666850305, "grad_norm": 2.5607872009277344, "learning_rate": 2.00527972473277e-05, "loss": 0.5032, "step": 28170 }, { "epoch": 2.8225572194120296, "grad_norm": 2.378352165222168, "learning_rate": 2.003734526904624e-05, "loss": 0.4109, "step": 28180 }, { "epoch": 2.8235588721390297, "grad_norm": 1.7307512760162354, "learning_rate": 2.0021895264124892e-05, "loss": 0.4661, "step": 28190 }, { "epoch": 2.824560524866029, "grad_norm": 2.8087072372436523, "learning_rate": 2.000644723870723e-05, "loss": 0.4496, "step": 28200 }, { "epoch": 2.8255621775930284, "grad_norm": 1.670059323310852, "learning_rate": 1.9991001198936043e-05, "loss": 0.4691, "step": 28210 }, { "epoch": 2.826563830320028, "grad_norm": 2.1640727519989014, "learning_rate": 1.9975557150953307e-05, "loss": 0.4691, "step": 28220 }, { "epoch": 2.8275654830470276, "grad_norm": 2.104672431945801, "learning_rate": 1.996011510090024e-05, "loss": 0.4978, "step": 28230 }, { "epoch": 2.828567135774027, "grad_norm": 2.842073917388916, "learning_rate": 1.994467505491725e-05, "loss": 0.4459, "step": 28240 }, { "epoch": 2.8295687885010268, "grad_norm": 2.6502578258514404, "learning_rate": 1.9929237019143937e-05, "loss": 0.4711, "step": 28250 }, { "epoch": 2.8305704412280264, "grad_norm": 1.7367525100708008, "learning_rate": 1.9913800999719122e-05, "loss": 0.4424, "step": 28260 }, { "epoch": 2.8315720939550255, "grad_norm": 1.9925302267074585, "learning_rate": 1.989836700278081e-05, "loss": 0.4808, "step": 28270 }, { "epoch": 2.8325737466820256, "grad_norm": 2.3260669708251953, "learning_rate": 1.988293503446623e-05, "loss": 0.401, "step": 28280 }, { "epoch": 2.8335753994090247, "grad_norm": 2.675114154815674, "learning_rate": 1.9867505100911744e-05, "loss": 0.4942, "step": 28290 }, { "epoch": 2.8345770521360243, "grad_norm": 2.485140085220337, "learning_rate": 1.985207720825296e-05, "loss": 0.5136, "step": 28300 }, { "epoch": 2.835578704863024, "grad_norm": 2.7628536224365234, "learning_rate": 1.9836651362624666e-05, "loss": 0.4945, "step": 28310 }, { "epoch": 2.8365803575900235, "grad_norm": 2.1196231842041016, "learning_rate": 1.9821227570160804e-05, "loss": 0.5113, "step": 28320 }, { "epoch": 2.837582010317023, "grad_norm": 1.7910830974578857, "learning_rate": 1.9805805836994546e-05, "loss": 0.4488, "step": 28330 }, { "epoch": 2.8385836630440227, "grad_norm": 2.0061752796173096, "learning_rate": 1.9790386169258197e-05, "loss": 0.4598, "step": 28340 }, { "epoch": 2.8395853157710222, "grad_norm": 2.4097883701324463, "learning_rate": 1.9774968573083294e-05, "loss": 0.4771, "step": 28350 }, { "epoch": 2.840586968498022, "grad_norm": 1.9780919551849365, "learning_rate": 1.97595530546005e-05, "loss": 0.5228, "step": 28360 }, { "epoch": 2.8415886212250214, "grad_norm": 2.0265541076660156, "learning_rate": 1.9744139619939673e-05, "loss": 0.4618, "step": 28370 }, { "epoch": 2.8425902739520206, "grad_norm": 2.245145082473755, "learning_rate": 1.972872827522985e-05, "loss": 0.5279, "step": 28380 }, { "epoch": 2.8435919266790206, "grad_norm": 2.6057050228118896, "learning_rate": 1.9713319026599235e-05, "loss": 0.4666, "step": 28390 }, { "epoch": 2.8445935794060198, "grad_norm": 1.8463797569274902, "learning_rate": 1.9697911880175178e-05, "loss": 0.4472, "step": 28400 }, { "epoch": 2.8455952321330193, "grad_norm": 2.043485403060913, "learning_rate": 1.9682506842084218e-05, "loss": 0.4854, "step": 28410 }, { "epoch": 2.846596884860019, "grad_norm": 2.281378746032715, "learning_rate": 1.966710391845205e-05, "loss": 0.499, "step": 28420 }, { "epoch": 2.8475985375870185, "grad_norm": 1.9262113571166992, "learning_rate": 1.9651703115403514e-05, "loss": 0.5275, "step": 28430 }, { "epoch": 2.848600190314018, "grad_norm": 1.9217075109481812, "learning_rate": 1.9636304439062613e-05, "loss": 0.4581, "step": 28440 }, { "epoch": 2.8496018430410177, "grad_norm": 2.031644105911255, "learning_rate": 1.9620907895552515e-05, "loss": 0.4833, "step": 28450 }, { "epoch": 2.8506034957680173, "grad_norm": 1.8989890813827515, "learning_rate": 1.9605513490995543e-05, "loss": 0.4247, "step": 28460 }, { "epoch": 2.851605148495017, "grad_norm": 2.3920555114746094, "learning_rate": 1.9590121231513137e-05, "loss": 0.5115, "step": 28470 }, { "epoch": 2.8526068012220165, "grad_norm": 2.1428468227386475, "learning_rate": 1.9574731123225922e-05, "loss": 0.5196, "step": 28480 }, { "epoch": 2.8536084539490156, "grad_norm": 1.9174599647521973, "learning_rate": 1.9559343172253645e-05, "loss": 0.5146, "step": 28490 }, { "epoch": 2.8546101066760157, "grad_norm": 1.9081904888153076, "learning_rate": 1.9543957384715213e-05, "loss": 0.4258, "step": 28500 }, { "epoch": 2.855611759403015, "grad_norm": 2.5541326999664307, "learning_rate": 1.952857376672865e-05, "loss": 0.5465, "step": 28510 }, { "epoch": 2.8566134121300144, "grad_norm": 2.1741418838500977, "learning_rate": 1.951319232441113e-05, "loss": 0.5138, "step": 28520 }, { "epoch": 2.857615064857014, "grad_norm": 2.374300479888916, "learning_rate": 1.9497813063878977e-05, "loss": 0.5121, "step": 28530 }, { "epoch": 2.8586167175840136, "grad_norm": 2.0950210094451904, "learning_rate": 1.948243599124761e-05, "loss": 0.4595, "step": 28540 }, { "epoch": 2.859618370311013, "grad_norm": 1.7163071632385254, "learning_rate": 1.94670611126316e-05, "loss": 0.5161, "step": 28550 }, { "epoch": 2.8606200230380128, "grad_norm": 2.6274149417877197, "learning_rate": 1.945168843414466e-05, "loss": 0.4492, "step": 28560 }, { "epoch": 2.8616216757650124, "grad_norm": 2.2445178031921387, "learning_rate": 1.9436317961899612e-05, "loss": 0.4481, "step": 28570 }, { "epoch": 2.862623328492012, "grad_norm": 2.0394294261932373, "learning_rate": 1.9420949702008384e-05, "loss": 0.4574, "step": 28580 }, { "epoch": 2.8636249812190115, "grad_norm": 2.9116296768188477, "learning_rate": 1.9405583660582062e-05, "loss": 0.4636, "step": 28590 }, { "epoch": 2.8646266339460107, "grad_norm": 2.4173495769500732, "learning_rate": 1.9390219843730825e-05, "loss": 0.4435, "step": 28600 }, { "epoch": 2.8656282866730107, "grad_norm": 2.1900784969329834, "learning_rate": 1.9374858257563968e-05, "loss": 0.4313, "step": 28610 }, { "epoch": 2.86662993940001, "grad_norm": 2.1137681007385254, "learning_rate": 1.9359498908189893e-05, "loss": 0.4938, "step": 28620 }, { "epoch": 2.8676315921270095, "grad_norm": 2.0387117862701416, "learning_rate": 1.934414180171614e-05, "loss": 0.4846, "step": 28630 }, { "epoch": 2.868633244854009, "grad_norm": 2.3030261993408203, "learning_rate": 1.9328786944249347e-05, "loss": 0.444, "step": 28640 }, { "epoch": 2.8696348975810086, "grad_norm": 2.0840537548065186, "learning_rate": 1.9313434341895224e-05, "loss": 0.4349, "step": 28650 }, { "epoch": 2.8706365503080082, "grad_norm": 2.0024921894073486, "learning_rate": 1.929808400075863e-05, "loss": 0.4305, "step": 28660 }, { "epoch": 2.871638203035008, "grad_norm": 2.2334063053131104, "learning_rate": 1.9282735926943492e-05, "loss": 0.4807, "step": 28670 }, { "epoch": 2.8726398557620074, "grad_norm": 2.153905153274536, "learning_rate": 1.9267390126552876e-05, "loss": 0.4767, "step": 28680 }, { "epoch": 2.873641508489007, "grad_norm": 1.7628467082977295, "learning_rate": 1.925204660568889e-05, "loss": 0.4862, "step": 28690 }, { "epoch": 2.8746431612160066, "grad_norm": 2.5055811405181885, "learning_rate": 1.923670537045277e-05, "loss": 0.5051, "step": 28700 }, { "epoch": 2.8756448139430058, "grad_norm": 2.421543836593628, "learning_rate": 1.9221366426944845e-05, "loss": 0.4746, "step": 28710 }, { "epoch": 2.876646466670006, "grad_norm": 2.5517852306365967, "learning_rate": 1.9206029781264513e-05, "loss": 0.4647, "step": 28720 }, { "epoch": 2.877648119397005, "grad_norm": 1.869573712348938, "learning_rate": 1.919069543951027e-05, "loss": 0.4866, "step": 28730 }, { "epoch": 2.8786497721240045, "grad_norm": 2.2313554286956787, "learning_rate": 1.91753634077797e-05, "loss": 0.4412, "step": 28740 }, { "epoch": 2.879651424851004, "grad_norm": 2.8452532291412354, "learning_rate": 1.916003369216947e-05, "loss": 0.5655, "step": 28750 }, { "epoch": 2.8806530775780037, "grad_norm": 2.1433467864990234, "learning_rate": 1.91447062987753e-05, "loss": 0.4176, "step": 28760 }, { "epoch": 2.8816547303050033, "grad_norm": 1.7829129695892334, "learning_rate": 1.9129381233692014e-05, "loss": 0.4787, "step": 28770 }, { "epoch": 2.882656383032003, "grad_norm": 2.395498275756836, "learning_rate": 1.911405850301351e-05, "loss": 0.4389, "step": 28780 }, { "epoch": 2.8836580357590025, "grad_norm": 1.7302987575531006, "learning_rate": 1.909873811283275e-05, "loss": 0.459, "step": 28790 }, { "epoch": 2.8846596884860016, "grad_norm": 2.646073341369629, "learning_rate": 1.9083420069241747e-05, "loss": 0.4666, "step": 28800 }, { "epoch": 2.8856613412130017, "grad_norm": 1.9419676065444946, "learning_rate": 1.9068104378331615e-05, "loss": 0.4968, "step": 28810 }, { "epoch": 2.886662993940001, "grad_norm": 1.7777568101882935, "learning_rate": 1.905279104619252e-05, "loss": 0.4824, "step": 28820 }, { "epoch": 2.8876646466670004, "grad_norm": 1.9487704038619995, "learning_rate": 1.903748007891367e-05, "loss": 0.5306, "step": 28830 }, { "epoch": 2.888666299394, "grad_norm": 2.5456743240356445, "learning_rate": 1.902217148258336e-05, "loss": 0.4403, "step": 28840 }, { "epoch": 2.8896679521209996, "grad_norm": 3.1180431842803955, "learning_rate": 1.9006865263288926e-05, "loss": 0.531, "step": 28850 }, { "epoch": 2.890669604847999, "grad_norm": 3.0304436683654785, "learning_rate": 1.8991561427116777e-05, "loss": 0.5187, "step": 28860 }, { "epoch": 2.8916712575749988, "grad_norm": 2.2932333946228027, "learning_rate": 1.8976259980152348e-05, "loss": 0.4541, "step": 28870 }, { "epoch": 2.8926729103019984, "grad_norm": 2.4942545890808105, "learning_rate": 1.8960960928480136e-05, "loss": 0.4583, "step": 28880 }, { "epoch": 2.893674563028998, "grad_norm": 1.900305986404419, "learning_rate": 1.8945664278183704e-05, "loss": 0.413, "step": 28890 }, { "epoch": 2.8946762157559975, "grad_norm": 2.3043212890625, "learning_rate": 1.8930370035345623e-05, "loss": 0.5049, "step": 28900 }, { "epoch": 2.8956778684829967, "grad_norm": 3.232800006866455, "learning_rate": 1.8915078206047535e-05, "loss": 0.4768, "step": 28910 }, { "epoch": 2.8966795212099967, "grad_norm": 1.8843035697937012, "learning_rate": 1.889978879637012e-05, "loss": 0.4959, "step": 28920 }, { "epoch": 2.897681173936996, "grad_norm": 2.1841742992401123, "learning_rate": 1.8884501812393086e-05, "loss": 0.4738, "step": 28930 }, { "epoch": 2.8986828266639955, "grad_norm": 2.26435923576355, "learning_rate": 1.8869217260195178e-05, "loss": 0.4499, "step": 28940 }, { "epoch": 2.899684479390995, "grad_norm": 2.054779529571533, "learning_rate": 1.885393514585417e-05, "loss": 0.4213, "step": 28950 }, { "epoch": 2.9006861321179946, "grad_norm": 2.1258368492126465, "learning_rate": 1.883865547544688e-05, "loss": 0.5051, "step": 28960 }, { "epoch": 2.9016877848449942, "grad_norm": 1.691569447517395, "learning_rate": 1.882337825504916e-05, "loss": 0.4256, "step": 28970 }, { "epoch": 2.902689437571994, "grad_norm": 2.8432114124298096, "learning_rate": 1.880810349073585e-05, "loss": 0.512, "step": 28980 }, { "epoch": 2.9036910902989934, "grad_norm": 2.043947696685791, "learning_rate": 1.8792831188580856e-05, "loss": 0.5151, "step": 28990 }, { "epoch": 2.904692743025993, "grad_norm": 1.4768218994140625, "learning_rate": 1.8777561354657087e-05, "loss": 0.4742, "step": 29000 }, { "epoch": 2.9056943957529926, "grad_norm": 2.128981590270996, "learning_rate": 1.8762293995036457e-05, "loss": 0.5125, "step": 29010 }, { "epoch": 2.9066960484799917, "grad_norm": 2.0752673149108887, "learning_rate": 1.874702911578992e-05, "loss": 0.5301, "step": 29020 }, { "epoch": 2.907697701206992, "grad_norm": 2.5276637077331543, "learning_rate": 1.873176672298743e-05, "loss": 0.4494, "step": 29030 }, { "epoch": 2.908699353933991, "grad_norm": 2.664680004119873, "learning_rate": 1.871650682269797e-05, "loss": 0.4918, "step": 29040 }, { "epoch": 2.9097010066609905, "grad_norm": 2.063596725463867, "learning_rate": 1.87012494209895e-05, "loss": 0.4775, "step": 29050 }, { "epoch": 2.91070265938799, "grad_norm": 1.8587756156921387, "learning_rate": 1.8685994523929005e-05, "loss": 0.4373, "step": 29060 }, { "epoch": 2.9117043121149897, "grad_norm": 2.288957118988037, "learning_rate": 1.8670742137582486e-05, "loss": 0.5238, "step": 29070 }, { "epoch": 2.9127059648419893, "grad_norm": 1.6254380941390991, "learning_rate": 1.8655492268014922e-05, "loss": 0.4024, "step": 29080 }, { "epoch": 2.913707617568989, "grad_norm": 1.8339122533798218, "learning_rate": 1.864024492129031e-05, "loss": 0.429, "step": 29090 }, { "epoch": 2.9147092702959885, "grad_norm": 2.6196937561035156, "learning_rate": 1.8625000103471622e-05, "loss": 0.5356, "step": 29100 }, { "epoch": 2.915710923022988, "grad_norm": 2.130065679550171, "learning_rate": 1.8609757820620856e-05, "loss": 0.4308, "step": 29110 }, { "epoch": 2.9167125757499877, "grad_norm": 2.1206037998199463, "learning_rate": 1.8594518078798976e-05, "loss": 0.4566, "step": 29120 }, { "epoch": 2.917714228476987, "grad_norm": 2.1794984340667725, "learning_rate": 1.8579280884065935e-05, "loss": 0.4251, "step": 29130 }, { "epoch": 2.918715881203987, "grad_norm": 1.4547064304351807, "learning_rate": 1.8564046242480697e-05, "loss": 0.4447, "step": 29140 }, { "epoch": 2.919717533930986, "grad_norm": 2.497053623199463, "learning_rate": 1.8548814160101197e-05, "loss": 0.4754, "step": 29150 }, { "epoch": 2.9207191866579856, "grad_norm": 2.3862662315368652, "learning_rate": 1.8533584642984334e-05, "loss": 0.5176, "step": 29160 }, { "epoch": 2.921720839384985, "grad_norm": 2.6100847721099854, "learning_rate": 1.8518357697186017e-05, "loss": 0.4517, "step": 29170 }, { "epoch": 2.9227224921119848, "grad_norm": 1.9027708768844604, "learning_rate": 1.8503133328761123e-05, "loss": 0.4236, "step": 29180 }, { "epoch": 2.9237241448389844, "grad_norm": 2.5158753395080566, "learning_rate": 1.8487911543763486e-05, "loss": 0.4196, "step": 29190 }, { "epoch": 2.924725797565984, "grad_norm": 2.278364896774292, "learning_rate": 1.847269234824594e-05, "loss": 0.5142, "step": 29200 }, { "epoch": 2.9257274502929835, "grad_norm": 2.325521469116211, "learning_rate": 1.8457475748260268e-05, "loss": 0.524, "step": 29210 }, { "epoch": 2.926729103019983, "grad_norm": 2.00109601020813, "learning_rate": 1.844226174985725e-05, "loss": 0.4382, "step": 29220 }, { "epoch": 2.9277307557469827, "grad_norm": 2.5737013816833496, "learning_rate": 1.8427050359086586e-05, "loss": 0.4711, "step": 29230 }, { "epoch": 2.928732408473982, "grad_norm": 2.1871094703674316, "learning_rate": 1.8411841581996966e-05, "loss": 0.4493, "step": 29240 }, { "epoch": 2.9297340612009815, "grad_norm": 2.6685447692871094, "learning_rate": 1.8396635424636056e-05, "loss": 0.4532, "step": 29250 }, { "epoch": 2.930735713927981, "grad_norm": 2.19049334526062, "learning_rate": 1.838143189305046e-05, "loss": 0.5213, "step": 29260 }, { "epoch": 2.9317373666549806, "grad_norm": 2.1203505992889404, "learning_rate": 1.836623099328573e-05, "loss": 0.5022, "step": 29270 }, { "epoch": 2.9327390193819802, "grad_norm": 2.115341901779175, "learning_rate": 1.8351032731386385e-05, "loss": 0.4569, "step": 29280 }, { "epoch": 2.93374067210898, "grad_norm": 2.712913990020752, "learning_rate": 1.833583711339591e-05, "loss": 0.4849, "step": 29290 }, { "epoch": 2.9347423248359794, "grad_norm": 2.8189609050750732, "learning_rate": 1.8320644145356697e-05, "loss": 0.5019, "step": 29300 }, { "epoch": 2.935743977562979, "grad_norm": 1.6259757280349731, "learning_rate": 1.830545383331012e-05, "loss": 0.4915, "step": 29310 }, { "epoch": 2.9367456302899786, "grad_norm": 2.498462677001953, "learning_rate": 1.8290266183296483e-05, "loss": 0.4389, "step": 29320 }, { "epoch": 2.9377472830169777, "grad_norm": 2.082543134689331, "learning_rate": 1.8275081201355044e-05, "loss": 0.4655, "step": 29330 }, { "epoch": 2.9387489357439778, "grad_norm": 2.135136842727661, "learning_rate": 1.8259898893523976e-05, "loss": 0.4485, "step": 29340 }, { "epoch": 2.939750588470977, "grad_norm": 2.071990489959717, "learning_rate": 1.824471926584041e-05, "loss": 0.4787, "step": 29350 }, { "epoch": 2.9407522411979765, "grad_norm": 1.6802020072937012, "learning_rate": 1.822954232434041e-05, "loss": 0.4796, "step": 29360 }, { "epoch": 2.941753893924976, "grad_norm": 1.772538661956787, "learning_rate": 1.8214368075058946e-05, "loss": 0.4682, "step": 29370 }, { "epoch": 2.9427555466519757, "grad_norm": 2.284114122390747, "learning_rate": 1.8199196524029956e-05, "loss": 0.4882, "step": 29380 }, { "epoch": 2.9437571993789753, "grad_norm": 2.2266476154327393, "learning_rate": 1.8184027677286275e-05, "loss": 0.494, "step": 29390 }, { "epoch": 2.944758852105975, "grad_norm": 3.315500497817993, "learning_rate": 1.8168861540859693e-05, "loss": 0.5365, "step": 29400 }, { "epoch": 2.9457605048329745, "grad_norm": 1.9478274583816528, "learning_rate": 1.815369812078088e-05, "loss": 0.4552, "step": 29410 }, { "epoch": 2.946762157559974, "grad_norm": 2.8757264614105225, "learning_rate": 1.8138537423079463e-05, "loss": 0.5344, "step": 29420 }, { "epoch": 2.9477638102869737, "grad_norm": 2.388357400894165, "learning_rate": 1.8123379453783962e-05, "loss": 0.5292, "step": 29430 }, { "epoch": 2.948765463013973, "grad_norm": 2.107295036315918, "learning_rate": 1.8108224218921838e-05, "loss": 0.4461, "step": 29440 }, { "epoch": 2.949767115740973, "grad_norm": 2.5148425102233887, "learning_rate": 1.809307172451944e-05, "loss": 0.4258, "step": 29450 }, { "epoch": 2.950768768467972, "grad_norm": 2.1600284576416016, "learning_rate": 1.8077921976602025e-05, "loss": 0.5051, "step": 29460 }, { "epoch": 2.9517704211949716, "grad_norm": 1.8847415447235107, "learning_rate": 1.8062774981193796e-05, "loss": 0.4117, "step": 29470 }, { "epoch": 2.952772073921971, "grad_norm": 1.3785723447799683, "learning_rate": 1.8047630744317817e-05, "loss": 0.4374, "step": 29480 }, { "epoch": 2.9537737266489708, "grad_norm": 3.6868884563446045, "learning_rate": 1.8032489271996073e-05, "loss": 0.5149, "step": 29490 }, { "epoch": 2.9547753793759703, "grad_norm": 2.474139451980591, "learning_rate": 1.801735057024946e-05, "loss": 0.4437, "step": 29500 }, { "epoch": 2.95577703210297, "grad_norm": 2.2893331050872803, "learning_rate": 1.8002214645097755e-05, "loss": 0.4841, "step": 29510 }, { "epoch": 2.9567786848299695, "grad_norm": 1.97756826877594, "learning_rate": 1.7987081502559634e-05, "loss": 0.4465, "step": 29520 }, { "epoch": 2.957780337556969, "grad_norm": 2.1581039428710938, "learning_rate": 1.7971951148652676e-05, "loss": 0.4425, "step": 29530 }, { "epoch": 2.9587819902839687, "grad_norm": 1.6784934997558594, "learning_rate": 1.7956823589393345e-05, "loss": 0.4253, "step": 29540 }, { "epoch": 2.959783643010968, "grad_norm": 1.9420301914215088, "learning_rate": 1.7941698830797005e-05, "loss": 0.4639, "step": 29550 }, { "epoch": 2.960785295737968, "grad_norm": 2.0235326290130615, "learning_rate": 1.7926576878877876e-05, "loss": 0.5391, "step": 29560 }, { "epoch": 2.961786948464967, "grad_norm": 1.8747906684875488, "learning_rate": 1.791145773964909e-05, "loss": 0.4593, "step": 29570 }, { "epoch": 2.9627886011919666, "grad_norm": 2.1775217056274414, "learning_rate": 1.7896341419122665e-05, "loss": 0.4645, "step": 29580 }, { "epoch": 2.9637902539189662, "grad_norm": 2.069146156311035, "learning_rate": 1.788122792330946e-05, "loss": 0.4522, "step": 29590 }, { "epoch": 2.964791906645966, "grad_norm": 2.7413077354431152, "learning_rate": 1.786611725821926e-05, "loss": 0.4479, "step": 29600 }, { "epoch": 2.9657935593729654, "grad_norm": 2.3349380493164062, "learning_rate": 1.785100942986068e-05, "loss": 0.4827, "step": 29610 }, { "epoch": 2.966795212099965, "grad_norm": 1.798128604888916, "learning_rate": 1.783590444424125e-05, "loss": 0.4739, "step": 29620 }, { "epoch": 2.9677968648269646, "grad_norm": 3.001051425933838, "learning_rate": 1.782080230736733e-05, "loss": 0.448, "step": 29630 }, { "epoch": 2.968798517553964, "grad_norm": 2.069577693939209, "learning_rate": 1.7805703025244163e-05, "loss": 0.378, "step": 29640 }, { "epoch": 2.9698001702809638, "grad_norm": 1.8276804685592651, "learning_rate": 1.779060660387588e-05, "loss": 0.4602, "step": 29650 }, { "epoch": 2.970801823007963, "grad_norm": 2.161472797393799, "learning_rate": 1.7775513049265427e-05, "loss": 0.454, "step": 29660 }, { "epoch": 2.971803475734963, "grad_norm": 2.3388967514038086, "learning_rate": 1.7760422367414643e-05, "loss": 0.4632, "step": 29670 }, { "epoch": 2.972805128461962, "grad_norm": 2.109787702560425, "learning_rate": 1.774533456432423e-05, "loss": 0.4374, "step": 29680 }, { "epoch": 2.9738067811889617, "grad_norm": 2.1075479984283447, "learning_rate": 1.7730249645993725e-05, "loss": 0.4088, "step": 29690 }, { "epoch": 2.9748084339159613, "grad_norm": 2.297384262084961, "learning_rate": 1.7715167618421512e-05, "loss": 0.5332, "step": 29700 }, { "epoch": 2.975810086642961, "grad_norm": 2.0056192874908447, "learning_rate": 1.7700088487604857e-05, "loss": 0.4708, "step": 29710 }, { "epoch": 2.9768117393699605, "grad_norm": 2.1155338287353516, "learning_rate": 1.7685012259539847e-05, "loss": 0.4678, "step": 29720 }, { "epoch": 2.97781339209696, "grad_norm": 2.3071138858795166, "learning_rate": 1.7669938940221438e-05, "loss": 0.4392, "step": 29730 }, { "epoch": 2.9788150448239596, "grad_norm": 2.3638877868652344, "learning_rate": 1.7654868535643397e-05, "loss": 0.4035, "step": 29740 }, { "epoch": 2.9798166975509592, "grad_norm": 2.3334412574768066, "learning_rate": 1.763980105179835e-05, "loss": 0.494, "step": 29750 }, { "epoch": 2.980818350277959, "grad_norm": 2.2591774463653564, "learning_rate": 1.762473649467779e-05, "loss": 0.3932, "step": 29760 }, { "epoch": 2.981820003004958, "grad_norm": 2.066215991973877, "learning_rate": 1.7609674870271985e-05, "loss": 0.4833, "step": 29770 }, { "epoch": 2.9828216557319576, "grad_norm": 1.8637421131134033, "learning_rate": 1.7594616184570085e-05, "loss": 0.4515, "step": 29780 }, { "epoch": 2.983823308458957, "grad_norm": 2.29803729057312, "learning_rate": 1.7579560443560045e-05, "loss": 0.5153, "step": 29790 }, { "epoch": 2.9848249611859567, "grad_norm": 2.6893725395202637, "learning_rate": 1.7564507653228685e-05, "loss": 0.4788, "step": 29800 }, { "epoch": 2.9858266139129563, "grad_norm": 2.405869483947754, "learning_rate": 1.7549457819561603e-05, "loss": 0.4965, "step": 29810 }, { "epoch": 2.986828266639956, "grad_norm": 2.2893760204315186, "learning_rate": 1.7534410948543253e-05, "loss": 0.4852, "step": 29820 }, { "epoch": 2.9878299193669555, "grad_norm": 2.5346555709838867, "learning_rate": 1.751936704615691e-05, "loss": 0.52, "step": 29830 }, { "epoch": 2.988831572093955, "grad_norm": 2.542680025100708, "learning_rate": 1.7504326118384658e-05, "loss": 0.433, "step": 29840 }, { "epoch": 2.9898332248209547, "grad_norm": 3.150050163269043, "learning_rate": 1.7489288171207394e-05, "loss": 0.5064, "step": 29850 }, { "epoch": 2.990834877547954, "grad_norm": 1.9536211490631104, "learning_rate": 1.7474253210604847e-05, "loss": 0.4579, "step": 29860 }, { "epoch": 2.991836530274954, "grad_norm": 2.1609601974487305, "learning_rate": 1.7459221242555553e-05, "loss": 0.4633, "step": 29870 }, { "epoch": 2.992838183001953, "grad_norm": 1.8260406255722046, "learning_rate": 1.744419227303684e-05, "loss": 0.4563, "step": 29880 }, { "epoch": 2.9938398357289526, "grad_norm": 2.1514317989349365, "learning_rate": 1.742916630802487e-05, "loss": 0.4483, "step": 29890 }, { "epoch": 2.994841488455952, "grad_norm": 2.2385141849517822, "learning_rate": 1.7414143353494585e-05, "loss": 0.4668, "step": 29900 }, { "epoch": 2.995843141182952, "grad_norm": 2.0614023208618164, "learning_rate": 1.739912341541977e-05, "loss": 0.4083, "step": 29910 }, { "epoch": 2.9968447939099514, "grad_norm": 2.2477142810821533, "learning_rate": 1.7384106499772954e-05, "loss": 0.5019, "step": 29920 }, { "epoch": 2.997846446636951, "grad_norm": 1.8166592121124268, "learning_rate": 1.736909261252551e-05, "loss": 0.4157, "step": 29930 }, { "epoch": 2.9988480993639506, "grad_norm": 1.802172303199768, "learning_rate": 1.735408175964759e-05, "loss": 0.4273, "step": 29940 }, { "epoch": 2.99984975209095, "grad_norm": 2.223151445388794, "learning_rate": 1.733907394710813e-05, "loss": 0.3838, "step": 29950 }, { "epoch": 3.0008013221815997, "grad_norm": 1.856826901435852, "learning_rate": 1.7324069180874874e-05, "loss": 0.3971, "step": 29960 }, { "epoch": 3.0018029749085993, "grad_norm": 2.1584417819976807, "learning_rate": 1.730906746691435e-05, "loss": 0.4797, "step": 29970 }, { "epoch": 3.002804627635599, "grad_norm": 2.594388484954834, "learning_rate": 1.729406881119187e-05, "loss": 0.4686, "step": 29980 }, { "epoch": 3.0038062803625984, "grad_norm": 2.0008304119110107, "learning_rate": 1.7279073219671525e-05, "loss": 0.4361, "step": 29990 }, { "epoch": 3.004807933089598, "grad_norm": 2.1979317665100098, "learning_rate": 1.7264080698316186e-05, "loss": 0.4022, "step": 30000 }, { "epoch": 3.004807933089598, "eval_bleu": 0.3025730108532601, "eval_loss": 0.5179290175437927, "eval_rouge1": 0.6867038326183534, "eval_rouge2": 0.5271660350662968, "eval_rougeL": 0.6468106582725407, "eval_runtime": 82898.3122, "eval_samples_per_second": 0.214, "eval_steps_per_second": 0.027, "eval_wer": 0.9895713443561743, "step": 30000 }, { "epoch": 3.005809585816597, "grad_norm": 1.4881905317306519, "learning_rate": 1.7249091253087528e-05, "loss": 0.4422, "step": 30010 }, { "epoch": 3.0068112385435968, "grad_norm": 2.210646390914917, "learning_rate": 1.7234104889945983e-05, "loss": 0.435, "step": 30020 }, { "epoch": 3.0078128912705964, "grad_norm": 2.2528607845306396, "learning_rate": 1.7219121614850737e-05, "loss": 0.4465, "step": 30030 }, { "epoch": 3.008814543997596, "grad_norm": 1.7737597227096558, "learning_rate": 1.720414143375979e-05, "loss": 0.4228, "step": 30040 }, { "epoch": 3.0098161967245955, "grad_norm": 2.0847411155700684, "learning_rate": 1.7189164352629895e-05, "loss": 0.3812, "step": 30050 }, { "epoch": 3.010817849451595, "grad_norm": 2.101858615875244, "learning_rate": 1.7174190377416545e-05, "loss": 0.4121, "step": 30060 }, { "epoch": 3.0118195021785947, "grad_norm": 2.346134662628174, "learning_rate": 1.7159219514074042e-05, "loss": 0.4031, "step": 30070 }, { "epoch": 3.0128211549055943, "grad_norm": 2.2245075702667236, "learning_rate": 1.714425176855542e-05, "loss": 0.3885, "step": 30080 }, { "epoch": 3.013822807632594, "grad_norm": 2.2482717037200928, "learning_rate": 1.71292871468125e-05, "loss": 0.4431, "step": 30090 }, { "epoch": 3.0148244603595935, "grad_norm": 2.3442859649658203, "learning_rate": 1.7114325654795823e-05, "loss": 0.4431, "step": 30100 }, { "epoch": 3.015826113086593, "grad_norm": 2.165501356124878, "learning_rate": 1.7099367298454723e-05, "loss": 0.4719, "step": 30110 }, { "epoch": 3.0168277658135922, "grad_norm": 2.0025136470794678, "learning_rate": 1.708441208373727e-05, "loss": 0.4177, "step": 30120 }, { "epoch": 3.017829418540592, "grad_norm": 1.7397187948226929, "learning_rate": 1.7069460016590272e-05, "loss": 0.4252, "step": 30130 }, { "epoch": 3.0188310712675914, "grad_norm": 1.9813027381896973, "learning_rate": 1.7054511102959315e-05, "loss": 0.4546, "step": 30140 }, { "epoch": 3.019832723994591, "grad_norm": 1.9185618162155151, "learning_rate": 1.70395653487887e-05, "loss": 0.4019, "step": 30150 }, { "epoch": 3.0208343767215906, "grad_norm": 1.8481942415237427, "learning_rate": 1.7024622760021508e-05, "loss": 0.3715, "step": 30160 }, { "epoch": 3.02183602944859, "grad_norm": 2.2238407135009766, "learning_rate": 1.7009683342599526e-05, "loss": 0.5105, "step": 30170 }, { "epoch": 3.02283768217559, "grad_norm": 2.2952382564544678, "learning_rate": 1.6994747102463292e-05, "loss": 0.4362, "step": 30180 }, { "epoch": 3.0238393349025894, "grad_norm": 2.0249791145324707, "learning_rate": 1.6979814045552095e-05, "loss": 0.4306, "step": 30190 }, { "epoch": 3.024840987629589, "grad_norm": 2.3426079750061035, "learning_rate": 1.6964884177803942e-05, "loss": 0.4174, "step": 30200 }, { "epoch": 3.0258426403565886, "grad_norm": 2.315671920776367, "learning_rate": 1.694995750515556e-05, "loss": 0.4406, "step": 30210 }, { "epoch": 3.0268442930835877, "grad_norm": 2.683622121810913, "learning_rate": 1.6935034033542443e-05, "loss": 0.4369, "step": 30220 }, { "epoch": 3.0278459458105873, "grad_norm": 2.1116085052490234, "learning_rate": 1.692011376889879e-05, "loss": 0.4556, "step": 30230 }, { "epoch": 3.028847598537587, "grad_norm": 2.1275312900543213, "learning_rate": 1.6905196717157505e-05, "loss": 0.4808, "step": 30240 }, { "epoch": 3.0298492512645865, "grad_norm": 1.7960904836654663, "learning_rate": 1.6890282884250247e-05, "loss": 0.4133, "step": 30250 }, { "epoch": 3.030850903991586, "grad_norm": 1.9978631734848022, "learning_rate": 1.687537227610739e-05, "loss": 0.4625, "step": 30260 }, { "epoch": 3.0318525567185857, "grad_norm": 2.269608497619629, "learning_rate": 1.6860464898658013e-05, "loss": 0.4151, "step": 30270 }, { "epoch": 3.0328542094455853, "grad_norm": 2.2099547386169434, "learning_rate": 1.6845560757829903e-05, "loss": 0.4048, "step": 30280 }, { "epoch": 3.033855862172585, "grad_norm": 2.1496224403381348, "learning_rate": 1.6830659859549593e-05, "loss": 0.3884, "step": 30290 }, { "epoch": 3.0348575148995844, "grad_norm": 2.702627420425415, "learning_rate": 1.681576220974229e-05, "loss": 0.4569, "step": 30300 }, { "epoch": 3.035859167626584, "grad_norm": 2.0695064067840576, "learning_rate": 1.6800867814331943e-05, "loss": 0.4137, "step": 30310 }, { "epoch": 3.0368608203535836, "grad_norm": 2.1617956161499023, "learning_rate": 1.678597667924118e-05, "loss": 0.4098, "step": 30320 }, { "epoch": 3.0378624730805828, "grad_norm": 2.5029983520507812, "learning_rate": 1.6771088810391332e-05, "loss": 0.4801, "step": 30330 }, { "epoch": 3.0388641258075824, "grad_norm": 1.9497227668762207, "learning_rate": 1.6756204213702467e-05, "loss": 0.4275, "step": 30340 }, { "epoch": 3.039865778534582, "grad_norm": 1.6746639013290405, "learning_rate": 1.6741322895093304e-05, "loss": 0.4753, "step": 30350 }, { "epoch": 3.0408674312615815, "grad_norm": 1.430365800857544, "learning_rate": 1.6726444860481283e-05, "loss": 0.3356, "step": 30360 }, { "epoch": 3.041869083988581, "grad_norm": 1.6194040775299072, "learning_rate": 1.6711570115782542e-05, "loss": 0.3804, "step": 30370 }, { "epoch": 3.0428707367155807, "grad_norm": 2.530052423477173, "learning_rate": 1.6696698666911913e-05, "loss": 0.4464, "step": 30380 }, { "epoch": 3.0438723894425803, "grad_norm": 1.8114992380142212, "learning_rate": 1.668183051978289e-05, "loss": 0.4295, "step": 30390 }, { "epoch": 3.04487404216958, "grad_norm": 2.182450532913208, "learning_rate": 1.666696568030769e-05, "loss": 0.3924, "step": 30400 }, { "epoch": 3.0458756948965795, "grad_norm": 1.8115897178649902, "learning_rate": 1.665210415439719e-05, "loss": 0.4182, "step": 30410 }, { "epoch": 3.046877347623579, "grad_norm": 1.8325234651565552, "learning_rate": 1.6637245947960955e-05, "loss": 0.4205, "step": 30420 }, { "epoch": 3.0478790003505782, "grad_norm": 2.4362452030181885, "learning_rate": 1.662239106690723e-05, "loss": 0.432, "step": 30430 }, { "epoch": 3.048880653077578, "grad_norm": 2.317858934402466, "learning_rate": 1.6607539517142952e-05, "loss": 0.4567, "step": 30440 }, { "epoch": 3.0498823058045774, "grad_norm": 2.2887580394744873, "learning_rate": 1.6592691304573722e-05, "loss": 0.4084, "step": 30450 }, { "epoch": 3.050883958531577, "grad_norm": 2.171954393386841, "learning_rate": 1.6577846435103794e-05, "loss": 0.4411, "step": 30460 }, { "epoch": 3.0518856112585766, "grad_norm": 2.362718105316162, "learning_rate": 1.6563004914636132e-05, "loss": 0.4254, "step": 30470 }, { "epoch": 3.052887263985576, "grad_norm": 2.31545090675354, "learning_rate": 1.6548166749072332e-05, "loss": 0.4505, "step": 30480 }, { "epoch": 3.053888916712576, "grad_norm": 1.8873096704483032, "learning_rate": 1.6533331944312694e-05, "loss": 0.3795, "step": 30490 }, { "epoch": 3.0548905694395754, "grad_norm": 2.9692976474761963, "learning_rate": 1.651850050625614e-05, "loss": 0.418, "step": 30500 }, { "epoch": 3.055892222166575, "grad_norm": 2.100770950317383, "learning_rate": 1.650367244080028e-05, "loss": 0.4898, "step": 30510 }, { "epoch": 3.0568938748935746, "grad_norm": 2.251727819442749, "learning_rate": 1.6488847753841382e-05, "loss": 0.4811, "step": 30520 }, { "epoch": 3.057895527620574, "grad_norm": 1.89990234375, "learning_rate": 1.6474026451274356e-05, "loss": 0.4516, "step": 30530 }, { "epoch": 3.0588971803475733, "grad_norm": 2.172435998916626, "learning_rate": 1.6459208538992772e-05, "loss": 0.3815, "step": 30540 }, { "epoch": 3.059898833074573, "grad_norm": 2.072516441345215, "learning_rate": 1.6444394022888868e-05, "loss": 0.4441, "step": 30550 }, { "epoch": 3.0609004858015725, "grad_norm": 2.7983038425445557, "learning_rate": 1.6429582908853515e-05, "loss": 0.3968, "step": 30560 }, { "epoch": 3.061902138528572, "grad_norm": 1.6153335571289062, "learning_rate": 1.6414775202776224e-05, "loss": 0.4167, "step": 30570 }, { "epoch": 3.0629037912555717, "grad_norm": 1.913484811782837, "learning_rate": 1.6399970910545165e-05, "loss": 0.4488, "step": 30580 }, { "epoch": 3.0639054439825713, "grad_norm": 1.8409531116485596, "learning_rate": 1.6385170038047153e-05, "loss": 0.4134, "step": 30590 }, { "epoch": 3.064907096709571, "grad_norm": 2.3591482639312744, "learning_rate": 1.637037259116764e-05, "loss": 0.3989, "step": 30600 }, { "epoch": 3.0659087494365704, "grad_norm": 2.223154306411743, "learning_rate": 1.6355578575790696e-05, "loss": 0.4391, "step": 30610 }, { "epoch": 3.06691040216357, "grad_norm": 1.8484059572219849, "learning_rate": 1.6340787997799058e-05, "loss": 0.4701, "step": 30620 }, { "epoch": 3.0679120548905696, "grad_norm": 2.000924825668335, "learning_rate": 1.6326000863074084e-05, "loss": 0.4881, "step": 30630 }, { "epoch": 3.068913707617569, "grad_norm": 2.186718702316284, "learning_rate": 1.6311217177495744e-05, "loss": 0.4545, "step": 30640 }, { "epoch": 3.0699153603445684, "grad_norm": 2.2570459842681885, "learning_rate": 1.6296436946942666e-05, "loss": 0.4912, "step": 30650 }, { "epoch": 3.070917013071568, "grad_norm": 1.8138389587402344, "learning_rate": 1.628166017729208e-05, "loss": 0.4317, "step": 30660 }, { "epoch": 3.0719186657985675, "grad_norm": 2.333362102508545, "learning_rate": 1.6266886874419874e-05, "loss": 0.4869, "step": 30670 }, { "epoch": 3.072920318525567, "grad_norm": 2.178196907043457, "learning_rate": 1.625211704420051e-05, "loss": 0.5721, "step": 30680 }, { "epoch": 3.0739219712525667, "grad_norm": 2.0177736282348633, "learning_rate": 1.62373506925071e-05, "loss": 0.4564, "step": 30690 }, { "epoch": 3.0749236239795663, "grad_norm": 2.001838207244873, "learning_rate": 1.622258782521137e-05, "loss": 0.4209, "step": 30700 }, { "epoch": 3.075925276706566, "grad_norm": 1.930259108543396, "learning_rate": 1.6207828448183653e-05, "loss": 0.4177, "step": 30710 }, { "epoch": 3.0769269294335655, "grad_norm": 2.031282663345337, "learning_rate": 1.6193072567292896e-05, "loss": 0.4197, "step": 30720 }, { "epoch": 3.077928582160565, "grad_norm": 1.463243007659912, "learning_rate": 1.6178320188406665e-05, "loss": 0.4557, "step": 30730 }, { "epoch": 3.0789302348875647, "grad_norm": 1.8603028059005737, "learning_rate": 1.6163571317391126e-05, "loss": 0.4094, "step": 30740 }, { "epoch": 3.079931887614564, "grad_norm": 2.061042547225952, "learning_rate": 1.6148825960111038e-05, "loss": 0.3696, "step": 30750 }, { "epoch": 3.0809335403415634, "grad_norm": 2.072138547897339, "learning_rate": 1.6134084122429778e-05, "loss": 0.4335, "step": 30760 }, { "epoch": 3.081935193068563, "grad_norm": 1.9868147373199463, "learning_rate": 1.611934581020933e-05, "loss": 0.4321, "step": 30770 }, { "epoch": 3.0829368457955626, "grad_norm": 2.325610399246216, "learning_rate": 1.610461102931027e-05, "loss": 0.4099, "step": 30780 }, { "epoch": 3.083938498522562, "grad_norm": 2.9362833499908447, "learning_rate": 1.6089879785591738e-05, "loss": 0.4579, "step": 30790 }, { "epoch": 3.084940151249562, "grad_norm": 1.9224374294281006, "learning_rate": 1.6075152084911526e-05, "loss": 0.4099, "step": 30800 }, { "epoch": 3.0859418039765614, "grad_norm": 2.3298308849334717, "learning_rate": 1.6060427933125983e-05, "loss": 0.4423, "step": 30810 }, { "epoch": 3.086943456703561, "grad_norm": 2.235973834991455, "learning_rate": 1.6045707336090026e-05, "loss": 0.4588, "step": 30820 }, { "epoch": 3.0879451094305606, "grad_norm": 2.449845790863037, "learning_rate": 1.6030990299657213e-05, "loss": 0.4757, "step": 30830 }, { "epoch": 3.08894676215756, "grad_norm": 1.9638694524765015, "learning_rate": 1.6016276829679633e-05, "loss": 0.3865, "step": 30840 }, { "epoch": 3.0899484148845597, "grad_norm": 1.989548921585083, "learning_rate": 1.6001566932008e-05, "loss": 0.393, "step": 30850 }, { "epoch": 3.090950067611559, "grad_norm": 2.4953360557556152, "learning_rate": 1.598686061249158e-05, "loss": 0.4132, "step": 30860 }, { "epoch": 3.0919517203385585, "grad_norm": 1.8345521688461304, "learning_rate": 1.597215787697821e-05, "loss": 0.4129, "step": 30870 }, { "epoch": 3.092953373065558, "grad_norm": 2.3891615867614746, "learning_rate": 1.5957458731314334e-05, "loss": 0.4426, "step": 30880 }, { "epoch": 3.0939550257925577, "grad_norm": 2.574930429458618, "learning_rate": 1.594276318134495e-05, "loss": 0.4353, "step": 30890 }, { "epoch": 3.0949566785195572, "grad_norm": 2.4506924152374268, "learning_rate": 1.5928071232913615e-05, "loss": 0.4643, "step": 30900 }, { "epoch": 3.095958331246557, "grad_norm": 3.072618246078491, "learning_rate": 1.591338289186247e-05, "loss": 0.4184, "step": 30910 }, { "epoch": 3.0969599839735564, "grad_norm": 2.3220643997192383, "learning_rate": 1.589869816403222e-05, "loss": 0.4504, "step": 30920 }, { "epoch": 3.097961636700556, "grad_norm": 1.746625542640686, "learning_rate": 1.588401705526213e-05, "loss": 0.4448, "step": 30930 }, { "epoch": 3.0989632894275556, "grad_norm": 2.5139360427856445, "learning_rate": 1.5869339571390012e-05, "loss": 0.4606, "step": 30940 }, { "epoch": 3.099964942154555, "grad_norm": 2.294275999069214, "learning_rate": 1.585466571825227e-05, "loss": 0.4509, "step": 30950 }, { "epoch": 3.1009665948815544, "grad_norm": 2.091242551803589, "learning_rate": 1.5839995501683842e-05, "loss": 0.3922, "step": 30960 }, { "epoch": 3.101968247608554, "grad_norm": 1.6841497421264648, "learning_rate": 1.5825328927518203e-05, "loss": 0.4624, "step": 30970 }, { "epoch": 3.1029699003355535, "grad_norm": 2.2461984157562256, "learning_rate": 1.5810666001587422e-05, "loss": 0.4055, "step": 30980 }, { "epoch": 3.103971553062553, "grad_norm": 2.257538318634033, "learning_rate": 1.579600672972209e-05, "loss": 0.4854, "step": 30990 }, { "epoch": 3.1049732057895527, "grad_norm": 2.6897525787353516, "learning_rate": 1.5781351117751336e-05, "loss": 0.4658, "step": 31000 }, { "epoch": 3.1059748585165523, "grad_norm": 2.213277816772461, "learning_rate": 1.5766699171502864e-05, "loss": 0.4478, "step": 31010 }, { "epoch": 3.106976511243552, "grad_norm": 2.556442975997925, "learning_rate": 1.575205089680289e-05, "loss": 0.385, "step": 31020 }, { "epoch": 3.1079781639705515, "grad_norm": 2.161545515060425, "learning_rate": 1.5737406299476202e-05, "loss": 0.3858, "step": 31030 }, { "epoch": 3.108979816697551, "grad_norm": 2.166633367538452, "learning_rate": 1.5722765385346088e-05, "loss": 0.4702, "step": 31040 }, { "epoch": 3.1099814694245507, "grad_norm": 1.7207388877868652, "learning_rate": 1.5708128160234398e-05, "loss": 0.4262, "step": 31050 }, { "epoch": 3.1109831221515503, "grad_norm": 2.28712797164917, "learning_rate": 1.569349462996151e-05, "loss": 0.4538, "step": 31060 }, { "epoch": 3.1119847748785494, "grad_norm": 2.292612075805664, "learning_rate": 1.567886480034634e-05, "loss": 0.41, "step": 31070 }, { "epoch": 3.112986427605549, "grad_norm": 2.4569859504699707, "learning_rate": 1.5664238677206303e-05, "loss": 0.4326, "step": 31080 }, { "epoch": 3.1139880803325486, "grad_norm": 2.032771110534668, "learning_rate": 1.5649616266357364e-05, "loss": 0.446, "step": 31090 }, { "epoch": 3.114989733059548, "grad_norm": 2.4700424671173096, "learning_rate": 1.563499757361403e-05, "loss": 0.4004, "step": 31100 }, { "epoch": 3.1159913857865478, "grad_norm": 2.9324538707733154, "learning_rate": 1.562038260478928e-05, "loss": 0.3842, "step": 31110 }, { "epoch": 3.1169930385135474, "grad_norm": 2.291475534439087, "learning_rate": 1.5605771365694653e-05, "loss": 0.3809, "step": 31120 }, { "epoch": 3.117994691240547, "grad_norm": 1.8152060508728027, "learning_rate": 1.5591163862140193e-05, "loss": 0.4574, "step": 31130 }, { "epoch": 3.1189963439675465, "grad_norm": 2.637399911880493, "learning_rate": 1.557656009993446e-05, "loss": 0.4402, "step": 31140 }, { "epoch": 3.119997996694546, "grad_norm": 2.726431131362915, "learning_rate": 1.5561960084884507e-05, "loss": 0.4582, "step": 31150 }, { "epoch": 3.1209996494215457, "grad_norm": 1.8519651889801025, "learning_rate": 1.5547363822795935e-05, "loss": 0.3868, "step": 31160 }, { "epoch": 3.1220013021485453, "grad_norm": 1.980515480041504, "learning_rate": 1.5532771319472824e-05, "loss": 0.4291, "step": 31170 }, { "epoch": 3.1230029548755445, "grad_norm": 2.1525003910064697, "learning_rate": 1.5518182580717756e-05, "loss": 0.4091, "step": 31180 }, { "epoch": 3.124004607602544, "grad_norm": 2.2667062282562256, "learning_rate": 1.5503597612331834e-05, "loss": 0.4454, "step": 31190 }, { "epoch": 3.1250062603295437, "grad_norm": 1.6090610027313232, "learning_rate": 1.5489016420114658e-05, "loss": 0.4377, "step": 31200 }, { "epoch": 3.1260079130565432, "grad_norm": 1.957115650177002, "learning_rate": 1.5474439009864328e-05, "loss": 0.4627, "step": 31210 }, { "epoch": 3.127009565783543, "grad_norm": 2.4358901977539062, "learning_rate": 1.545986538737742e-05, "loss": 0.4202, "step": 31220 }, { "epoch": 3.1280112185105424, "grad_norm": 2.5619661808013916, "learning_rate": 1.5445295558449024e-05, "loss": 0.4035, "step": 31230 }, { "epoch": 3.129012871237542, "grad_norm": 3.102004289627075, "learning_rate": 1.5430729528872722e-05, "loss": 0.4026, "step": 31240 }, { "epoch": 3.1300145239645416, "grad_norm": 1.8699795007705688, "learning_rate": 1.541616730444058e-05, "loss": 0.4918, "step": 31250 }, { "epoch": 3.131016176691541, "grad_norm": 1.6903408765792847, "learning_rate": 1.540160889094315e-05, "loss": 0.37, "step": 31260 }, { "epoch": 3.132017829418541, "grad_norm": 2.113809108734131, "learning_rate": 1.5387054294169455e-05, "loss": 0.4645, "step": 31270 }, { "epoch": 3.13301948214554, "grad_norm": 1.8396596908569336, "learning_rate": 1.5372503519907035e-05, "loss": 0.3981, "step": 31280 }, { "epoch": 3.1340211348725395, "grad_norm": 1.8806499242782593, "learning_rate": 1.5357956573941872e-05, "loss": 0.4145, "step": 31290 }, { "epoch": 3.135022787599539, "grad_norm": 1.8449625968933105, "learning_rate": 1.5343413462058453e-05, "loss": 0.4274, "step": 31300 }, { "epoch": 3.1360244403265387, "grad_norm": 2.39495849609375, "learning_rate": 1.532887419003973e-05, "loss": 0.4665, "step": 31310 }, { "epoch": 3.1370260930535383, "grad_norm": 2.1482601165771484, "learning_rate": 1.5314338763667132e-05, "loss": 0.4653, "step": 31320 }, { "epoch": 3.138027745780538, "grad_norm": 1.9213060140609741, "learning_rate": 1.529980718872054e-05, "loss": 0.4786, "step": 31330 }, { "epoch": 3.1390293985075375, "grad_norm": 1.8243101835250854, "learning_rate": 1.5285279470978335e-05, "loss": 0.3736, "step": 31340 }, { "epoch": 3.140031051234537, "grad_norm": 2.4560937881469727, "learning_rate": 1.5270755616217343e-05, "loss": 0.4372, "step": 31350 }, { "epoch": 3.1410327039615367, "grad_norm": 1.697628140449524, "learning_rate": 1.5256235630212864e-05, "loss": 0.3558, "step": 31360 }, { "epoch": 3.1420343566885363, "grad_norm": 1.9843086004257202, "learning_rate": 1.5241719518738646e-05, "loss": 0.4401, "step": 31370 }, { "epoch": 3.1430360094155354, "grad_norm": 2.132075786590576, "learning_rate": 1.5227207287566903e-05, "loss": 0.4759, "step": 31380 }, { "epoch": 3.144037662142535, "grad_norm": 1.9841755628585815, "learning_rate": 1.521269894246833e-05, "loss": 0.4012, "step": 31390 }, { "epoch": 3.1450393148695346, "grad_norm": 2.36376690864563, "learning_rate": 1.5198194489212025e-05, "loss": 0.4353, "step": 31400 }, { "epoch": 3.146040967596534, "grad_norm": 2.0222671031951904, "learning_rate": 1.5183693933565585e-05, "loss": 0.4259, "step": 31410 }, { "epoch": 3.1470426203235338, "grad_norm": 1.9244353771209717, "learning_rate": 1.5169197281295033e-05, "loss": 0.4191, "step": 31420 }, { "epoch": 3.1480442730505334, "grad_norm": 2.056471586227417, "learning_rate": 1.515470453816486e-05, "loss": 0.44, "step": 31430 }, { "epoch": 3.149045925777533, "grad_norm": 2.2160956859588623, "learning_rate": 1.514021570993798e-05, "loss": 0.4678, "step": 31440 }, { "epoch": 3.1500475785045325, "grad_norm": 2.4954745769500732, "learning_rate": 1.5125730802375754e-05, "loss": 0.4518, "step": 31450 }, { "epoch": 3.151049231231532, "grad_norm": 2.0969278812408447, "learning_rate": 1.511124982123801e-05, "loss": 0.4325, "step": 31460 }, { "epoch": 3.1520508839585317, "grad_norm": 2.045363664627075, "learning_rate": 1.5096772772282977e-05, "loss": 0.4275, "step": 31470 }, { "epoch": 3.1530525366855313, "grad_norm": 2.1558549404144287, "learning_rate": 1.5082299661267338e-05, "loss": 0.4799, "step": 31480 }, { "epoch": 3.1540541894125305, "grad_norm": 1.989071249961853, "learning_rate": 1.5067830493946222e-05, "loss": 0.444, "step": 31490 }, { "epoch": 3.15505584213953, "grad_norm": 1.6298166513442993, "learning_rate": 1.5053365276073176e-05, "loss": 0.4246, "step": 31500 }, { "epoch": 3.1560574948665296, "grad_norm": 1.942826747894287, "learning_rate": 1.5038904013400165e-05, "loss": 0.4383, "step": 31510 }, { "epoch": 3.1570591475935292, "grad_norm": 2.3768038749694824, "learning_rate": 1.502444671167761e-05, "loss": 0.4235, "step": 31520 }, { "epoch": 3.158060800320529, "grad_norm": 1.900918960571289, "learning_rate": 1.500999337665433e-05, "loss": 0.3984, "step": 31530 }, { "epoch": 3.1590624530475284, "grad_norm": 2.272731304168701, "learning_rate": 1.4995544014077595e-05, "loss": 0.4221, "step": 31540 }, { "epoch": 3.160064105774528, "grad_norm": 2.2893619537353516, "learning_rate": 1.4981098629693066e-05, "loss": 0.4546, "step": 31550 }, { "epoch": 3.1610657585015276, "grad_norm": 2.42533016204834, "learning_rate": 1.496665722924483e-05, "loss": 0.3829, "step": 31560 }, { "epoch": 3.162067411228527, "grad_norm": 2.7083005905151367, "learning_rate": 1.4952219818475422e-05, "loss": 0.4093, "step": 31570 }, { "epoch": 3.163069063955527, "grad_norm": 2.4388954639434814, "learning_rate": 1.4937786403125725e-05, "loss": 0.4436, "step": 31580 }, { "epoch": 3.1640707166825264, "grad_norm": 2.242216110229492, "learning_rate": 1.4923356988935094e-05, "loss": 0.4942, "step": 31590 }, { "epoch": 3.1650723694095255, "grad_norm": 2.0267419815063477, "learning_rate": 1.4908931581641267e-05, "loss": 0.4141, "step": 31600 }, { "epoch": 3.166074022136525, "grad_norm": 2.0003437995910645, "learning_rate": 1.48945101869804e-05, "loss": 0.4125, "step": 31610 }, { "epoch": 3.1670756748635247, "grad_norm": 2.4626035690307617, "learning_rate": 1.4880092810687034e-05, "loss": 0.3956, "step": 31620 }, { "epoch": 3.1680773275905243, "grad_norm": 2.163177967071533, "learning_rate": 1.4865679458494123e-05, "loss": 0.4299, "step": 31630 }, { "epoch": 3.169078980317524, "grad_norm": 2.259563684463501, "learning_rate": 1.485127013613303e-05, "loss": 0.4229, "step": 31640 }, { "epoch": 3.1700806330445235, "grad_norm": 1.6991698741912842, "learning_rate": 1.4836864849333509e-05, "loss": 0.4263, "step": 31650 }, { "epoch": 3.171082285771523, "grad_norm": 2.012312889099121, "learning_rate": 1.4822463603823694e-05, "loss": 0.4329, "step": 31660 }, { "epoch": 3.1720839384985227, "grad_norm": 1.6116600036621094, "learning_rate": 1.480806640533014e-05, "loss": 0.45, "step": 31670 }, { "epoch": 3.1730855912255223, "grad_norm": 1.7727469205856323, "learning_rate": 1.4793673259577773e-05, "loss": 0.4319, "step": 31680 }, { "epoch": 3.174087243952522, "grad_norm": 1.567363977432251, "learning_rate": 1.4779284172289904e-05, "loss": 0.4308, "step": 31690 }, { "epoch": 3.1750888966795214, "grad_norm": 2.1986780166625977, "learning_rate": 1.4764899149188256e-05, "loss": 0.4781, "step": 31700 }, { "epoch": 3.1760905494065206, "grad_norm": 2.068117380142212, "learning_rate": 1.4750518195992897e-05, "loss": 0.4237, "step": 31710 }, { "epoch": 3.17709220213352, "grad_norm": 2.2476770877838135, "learning_rate": 1.4736141318422331e-05, "loss": 0.478, "step": 31720 }, { "epoch": 3.1780938548605198, "grad_norm": 2.5276355743408203, "learning_rate": 1.4721768522193372e-05, "loss": 0.4347, "step": 31730 }, { "epoch": 3.1790955075875194, "grad_norm": 2.2223894596099854, "learning_rate": 1.4707399813021272e-05, "loss": 0.466, "step": 31740 }, { "epoch": 3.180097160314519, "grad_norm": 1.6248425245285034, "learning_rate": 1.4693035196619629e-05, "loss": 0.4342, "step": 31750 }, { "epoch": 3.1810988130415185, "grad_norm": 1.8279471397399902, "learning_rate": 1.4678674678700405e-05, "loss": 0.4708, "step": 31760 }, { "epoch": 3.182100465768518, "grad_norm": 2.691256523132324, "learning_rate": 1.4664318264973965e-05, "loss": 0.4571, "step": 31770 }, { "epoch": 3.1831021184955177, "grad_norm": 4.807773590087891, "learning_rate": 1.4649965961149003e-05, "loss": 0.4275, "step": 31780 }, { "epoch": 3.1841037712225173, "grad_norm": 1.9490879774093628, "learning_rate": 1.4635617772932625e-05, "loss": 0.4218, "step": 31790 }, { "epoch": 3.185105423949517, "grad_norm": 2.2792632579803467, "learning_rate": 1.462127370603025e-05, "loss": 0.4507, "step": 31800 }, { "epoch": 3.186107076676516, "grad_norm": 2.2887744903564453, "learning_rate": 1.4606933766145686e-05, "loss": 0.458, "step": 31810 }, { "epoch": 3.1871087294035156, "grad_norm": 2.0166826248168945, "learning_rate": 1.4592597958981102e-05, "loss": 0.4476, "step": 31820 }, { "epoch": 3.1881103821305152, "grad_norm": 1.9539391994476318, "learning_rate": 1.457826629023702e-05, "loss": 0.4931, "step": 31830 }, { "epoch": 3.189112034857515, "grad_norm": 2.3129167556762695, "learning_rate": 1.4563938765612304e-05, "loss": 0.4348, "step": 31840 }, { "epoch": 3.1901136875845144, "grad_norm": 2.1217124462127686, "learning_rate": 1.4549615390804183e-05, "loss": 0.4267, "step": 31850 }, { "epoch": 3.191115340311514, "grad_norm": 2.5243582725524902, "learning_rate": 1.453529617150824e-05, "loss": 0.4407, "step": 31860 }, { "epoch": 3.1921169930385136, "grad_norm": 2.3081367015838623, "learning_rate": 1.4520981113418391e-05, "loss": 0.5121, "step": 31870 }, { "epoch": 3.193118645765513, "grad_norm": 2.267855167388916, "learning_rate": 1.450667022222691e-05, "loss": 0.4379, "step": 31880 }, { "epoch": 3.1941202984925128, "grad_norm": 1.9463863372802734, "learning_rate": 1.44923635036244e-05, "loss": 0.4241, "step": 31890 }, { "epoch": 3.1951219512195124, "grad_norm": 1.9318593740463257, "learning_rate": 1.4478060963299825e-05, "loss": 0.4678, "step": 31900 }, { "epoch": 3.1961236039465115, "grad_norm": 2.170785665512085, "learning_rate": 1.4463762606940467e-05, "loss": 0.471, "step": 31910 }, { "epoch": 3.197125256673511, "grad_norm": 2.2391130924224854, "learning_rate": 1.4449468440231961e-05, "loss": 0.4823, "step": 31920 }, { "epoch": 3.1981269094005107, "grad_norm": 2.5374581813812256, "learning_rate": 1.4435178468858263e-05, "loss": 0.4299, "step": 31930 }, { "epoch": 3.1991285621275103, "grad_norm": 1.7519168853759766, "learning_rate": 1.4420892698501665e-05, "loss": 0.4503, "step": 31940 }, { "epoch": 3.20013021485451, "grad_norm": 2.36940860748291, "learning_rate": 1.4406611134842795e-05, "loss": 0.4593, "step": 31950 }, { "epoch": 3.2011318675815095, "grad_norm": 2.056506872177124, "learning_rate": 1.4392333783560602e-05, "loss": 0.4437, "step": 31960 }, { "epoch": 3.202133520308509, "grad_norm": 2.2406508922576904, "learning_rate": 1.4378060650332359e-05, "loss": 0.3915, "step": 31970 }, { "epoch": 3.2031351730355087, "grad_norm": 2.973459482192993, "learning_rate": 1.4363791740833666e-05, "loss": 0.4502, "step": 31980 }, { "epoch": 3.2041368257625082, "grad_norm": 2.4402339458465576, "learning_rate": 1.4349527060738435e-05, "loss": 0.4597, "step": 31990 }, { "epoch": 3.205138478489508, "grad_norm": 2.2443811893463135, "learning_rate": 1.43352666157189e-05, "loss": 0.4083, "step": 32000 }, { "epoch": 3.2061401312165074, "grad_norm": 1.9804131984710693, "learning_rate": 1.432101041144564e-05, "loss": 0.4083, "step": 32010 }, { "epoch": 3.2071417839435066, "grad_norm": 2.6847102642059326, "learning_rate": 1.4306758453587494e-05, "loss": 0.4574, "step": 32020 }, { "epoch": 3.208143436670506, "grad_norm": 2.2255048751831055, "learning_rate": 1.4292510747811633e-05, "loss": 0.3964, "step": 32030 }, { "epoch": 3.2091450893975058, "grad_norm": 1.9805270433425903, "learning_rate": 1.4278267299783576e-05, "loss": 0.454, "step": 32040 }, { "epoch": 3.2101467421245053, "grad_norm": 2.9171619415283203, "learning_rate": 1.4264028115167091e-05, "loss": 0.4639, "step": 32050 }, { "epoch": 3.211148394851505, "grad_norm": 1.7909626960754395, "learning_rate": 1.4249793199624284e-05, "loss": 0.4419, "step": 32060 }, { "epoch": 3.2121500475785045, "grad_norm": 1.8210331201553345, "learning_rate": 1.4235562558815544e-05, "loss": 0.3819, "step": 32070 }, { "epoch": 3.213151700305504, "grad_norm": 2.2666754722595215, "learning_rate": 1.4221336198399605e-05, "loss": 0.4234, "step": 32080 }, { "epoch": 3.2141533530325037, "grad_norm": 2.126728057861328, "learning_rate": 1.420711412403343e-05, "loss": 0.4262, "step": 32090 }, { "epoch": 3.2151550057595033, "grad_norm": 2.2342824935913086, "learning_rate": 1.419289634137232e-05, "loss": 0.4111, "step": 32100 }, { "epoch": 3.216156658486503, "grad_norm": 2.0711076259613037, "learning_rate": 1.417868285606988e-05, "loss": 0.4585, "step": 32110 }, { "epoch": 3.2171583112135025, "grad_norm": 2.6071414947509766, "learning_rate": 1.416447367377799e-05, "loss": 0.4229, "step": 32120 }, { "epoch": 3.2181599639405016, "grad_norm": 2.0745346546173096, "learning_rate": 1.4150268800146787e-05, "loss": 0.4607, "step": 32130 }, { "epoch": 3.2191616166675012, "grad_norm": 2.3919355869293213, "learning_rate": 1.4136068240824758e-05, "loss": 0.4789, "step": 32140 }, { "epoch": 3.220163269394501, "grad_norm": 1.9690043926239014, "learning_rate": 1.4121872001458641e-05, "loss": 0.4274, "step": 32150 }, { "epoch": 3.2211649221215004, "grad_norm": 2.3856241703033447, "learning_rate": 1.4107680087693425e-05, "loss": 0.3834, "step": 32160 }, { "epoch": 3.2221665748485, "grad_norm": 2.1940131187438965, "learning_rate": 1.4093492505172445e-05, "loss": 0.3788, "step": 32170 }, { "epoch": 3.2231682275754996, "grad_norm": 2.333587169647217, "learning_rate": 1.407930925953727e-05, "loss": 0.4461, "step": 32180 }, { "epoch": 3.224169880302499, "grad_norm": 2.39921236038208, "learning_rate": 1.4065130356427753e-05, "loss": 0.4476, "step": 32190 }, { "epoch": 3.2251715330294988, "grad_norm": 2.1098825931549072, "learning_rate": 1.4050955801482024e-05, "loss": 0.4647, "step": 32200 }, { "epoch": 3.2261731857564984, "grad_norm": 1.7223210334777832, "learning_rate": 1.4036785600336477e-05, "loss": 0.4261, "step": 32210 }, { "epoch": 3.227174838483498, "grad_norm": 1.8587815761566162, "learning_rate": 1.4022619758625793e-05, "loss": 0.4039, "step": 32220 }, { "epoch": 3.2281764912104975, "grad_norm": 2.4933252334594727, "learning_rate": 1.4008458281982879e-05, "loss": 0.4714, "step": 32230 }, { "epoch": 3.2291781439374967, "grad_norm": 1.8548504114151, "learning_rate": 1.399430117603896e-05, "loss": 0.4457, "step": 32240 }, { "epoch": 3.2301797966644963, "grad_norm": 1.866041660308838, "learning_rate": 1.3980148446423483e-05, "loss": 0.4801, "step": 32250 }, { "epoch": 3.231181449391496, "grad_norm": 2.160381317138672, "learning_rate": 1.3966000098764175e-05, "loss": 0.4553, "step": 32260 }, { "epoch": 3.2321831021184955, "grad_norm": 2.0052473545074463, "learning_rate": 1.3951856138687009e-05, "loss": 0.4554, "step": 32270 }, { "epoch": 3.233184754845495, "grad_norm": 2.251857280731201, "learning_rate": 1.3937716571816218e-05, "loss": 0.3904, "step": 32280 }, { "epoch": 3.2341864075724946, "grad_norm": 2.450507164001465, "learning_rate": 1.3923581403774294e-05, "loss": 0.4219, "step": 32290 }, { "epoch": 3.2351880602994942, "grad_norm": 2.243173599243164, "learning_rate": 1.3909450640181965e-05, "loss": 0.4448, "step": 32300 }, { "epoch": 3.236189713026494, "grad_norm": 2.0648858547210693, "learning_rate": 1.3895324286658224e-05, "loss": 0.471, "step": 32310 }, { "epoch": 3.2371913657534934, "grad_norm": 2.1174683570861816, "learning_rate": 1.38812023488203e-05, "loss": 0.4278, "step": 32320 }, { "epoch": 3.238193018480493, "grad_norm": 2.3641650676727295, "learning_rate": 1.3867084832283667e-05, "loss": 0.4653, "step": 32330 }, { "epoch": 3.239194671207492, "grad_norm": 1.988904356956482, "learning_rate": 1.3852971742662047e-05, "loss": 0.4166, "step": 32340 }, { "epoch": 3.2401963239344918, "grad_norm": 2.140962839126587, "learning_rate": 1.3838863085567395e-05, "loss": 0.4228, "step": 32350 }, { "epoch": 3.2411979766614913, "grad_norm": 2.70149564743042, "learning_rate": 1.3824758866609897e-05, "loss": 0.393, "step": 32360 }, { "epoch": 3.242199629388491, "grad_norm": 1.729416012763977, "learning_rate": 1.3810659091398012e-05, "loss": 0.5004, "step": 32370 }, { "epoch": 3.2432012821154905, "grad_norm": 1.8394402265548706, "learning_rate": 1.3796563765538376e-05, "loss": 0.4351, "step": 32380 }, { "epoch": 3.24420293484249, "grad_norm": 2.235522985458374, "learning_rate": 1.378247289463589e-05, "loss": 0.3838, "step": 32390 }, { "epoch": 3.2452045875694897, "grad_norm": 1.8229221105575562, "learning_rate": 1.376838648429367e-05, "loss": 0.3934, "step": 32400 }, { "epoch": 3.2462062402964893, "grad_norm": 1.658739686012268, "learning_rate": 1.3754304540113089e-05, "loss": 0.3869, "step": 32410 }, { "epoch": 3.247207893023489, "grad_norm": 2.5649220943450928, "learning_rate": 1.3740227067693697e-05, "loss": 0.4916, "step": 32420 }, { "epoch": 3.2482095457504885, "grad_norm": 1.8066800832748413, "learning_rate": 1.3726154072633285e-05, "loss": 0.456, "step": 32430 }, { "epoch": 3.2492111984774876, "grad_norm": 2.1143670082092285, "learning_rate": 1.37120855605279e-05, "loss": 0.4308, "step": 32440 }, { "epoch": 3.250212851204487, "grad_norm": 2.2256264686584473, "learning_rate": 1.3698021536971733e-05, "loss": 0.4165, "step": 32450 }, { "epoch": 3.251214503931487, "grad_norm": 1.9437214136123657, "learning_rate": 1.3683962007557247e-05, "loss": 0.447, "step": 32460 }, { "epoch": 3.2522161566584864, "grad_norm": 2.0992989540100098, "learning_rate": 1.3669906977875113e-05, "loss": 0.3604, "step": 32470 }, { "epoch": 3.253217809385486, "grad_norm": 2.455745220184326, "learning_rate": 1.3655856453514202e-05, "loss": 0.3967, "step": 32480 }, { "epoch": 3.2542194621124856, "grad_norm": 1.914459228515625, "learning_rate": 1.3641810440061564e-05, "loss": 0.4463, "step": 32490 }, { "epoch": 3.255221114839485, "grad_norm": 2.0907294750213623, "learning_rate": 1.3627768943102515e-05, "loss": 0.448, "step": 32500 }, { "epoch": 3.2562227675664848, "grad_norm": 2.3785226345062256, "learning_rate": 1.3613731968220539e-05, "loss": 0.4895, "step": 32510 }, { "epoch": 3.2572244202934844, "grad_norm": 2.042774200439453, "learning_rate": 1.3599699520997306e-05, "loss": 0.4133, "step": 32520 }, { "epoch": 3.258226073020484, "grad_norm": 2.2214324474334717, "learning_rate": 1.3585671607012735e-05, "loss": 0.3817, "step": 32530 }, { "epoch": 3.2592277257474835, "grad_norm": 2.654376983642578, "learning_rate": 1.3571648231844897e-05, "loss": 0.4512, "step": 32540 }, { "epoch": 3.2602293784744827, "grad_norm": 2.0407917499542236, "learning_rate": 1.3557629401070094e-05, "loss": 0.3947, "step": 32550 }, { "epoch": 3.2612310312014823, "grad_norm": 1.8769006729125977, "learning_rate": 1.3543615120262764e-05, "loss": 0.4184, "step": 32560 }, { "epoch": 3.262232683928482, "grad_norm": 1.9162296056747437, "learning_rate": 1.3529605394995613e-05, "loss": 0.3897, "step": 32570 }, { "epoch": 3.2632343366554815, "grad_norm": 2.0198333263397217, "learning_rate": 1.3515600230839478e-05, "loss": 0.4421, "step": 32580 }, { "epoch": 3.264235989382481, "grad_norm": 2.189434289932251, "learning_rate": 1.350159963336341e-05, "loss": 0.4541, "step": 32590 }, { "epoch": 3.2652376421094806, "grad_norm": 1.9533107280731201, "learning_rate": 1.3487603608134627e-05, "loss": 0.4527, "step": 32600 }, { "epoch": 3.2662392948364802, "grad_norm": 2.1534202098846436, "learning_rate": 1.347361216071854e-05, "loss": 0.4493, "step": 32610 }, { "epoch": 3.26724094756348, "grad_norm": 1.7690802812576294, "learning_rate": 1.345962529667874e-05, "loss": 0.5048, "step": 32620 }, { "epoch": 3.2682426002904794, "grad_norm": 2.1077520847320557, "learning_rate": 1.3445643021576987e-05, "loss": 0.43, "step": 32630 }, { "epoch": 3.269244253017479, "grad_norm": 2.1861956119537354, "learning_rate": 1.3431665340973223e-05, "loss": 0.4492, "step": 32640 }, { "epoch": 3.2702459057444786, "grad_norm": 2.0920753479003906, "learning_rate": 1.3417692260425564e-05, "loss": 0.4196, "step": 32650 }, { "epoch": 3.2712475584714777, "grad_norm": 2.3200623989105225, "learning_rate": 1.3403723785490286e-05, "loss": 0.4897, "step": 32660 }, { "epoch": 3.2722492111984773, "grad_norm": 2.933948516845703, "learning_rate": 1.338975992172185e-05, "loss": 0.4299, "step": 32670 }, { "epoch": 3.273250863925477, "grad_norm": 2.2343332767486572, "learning_rate": 1.3375800674672872e-05, "loss": 0.3738, "step": 32680 }, { "epoch": 3.2742525166524765, "grad_norm": 2.4421591758728027, "learning_rate": 1.3361846049894127e-05, "loss": 0.4757, "step": 32690 }, { "epoch": 3.275254169379476, "grad_norm": 1.903268575668335, "learning_rate": 1.3347896052934591e-05, "loss": 0.3908, "step": 32700 }, { "epoch": 3.2762558221064757, "grad_norm": 1.6958303451538086, "learning_rate": 1.3333950689341335e-05, "loss": 0.4063, "step": 32710 }, { "epoch": 3.2772574748334753, "grad_norm": 2.532978057861328, "learning_rate": 1.3320009964659624e-05, "loss": 0.4859, "step": 32720 }, { "epoch": 3.278259127560475, "grad_norm": 2.2470481395721436, "learning_rate": 1.3306073884432907e-05, "loss": 0.4377, "step": 32730 }, { "epoch": 3.2792607802874745, "grad_norm": 2.7446441650390625, "learning_rate": 1.3292142454202725e-05, "loss": 0.4472, "step": 32740 }, { "epoch": 3.280262433014474, "grad_norm": 2.225325107574463, "learning_rate": 1.327821567950881e-05, "loss": 0.4215, "step": 32750 }, { "epoch": 3.2812640857414737, "grad_norm": 2.3718440532684326, "learning_rate": 1.326429356588902e-05, "loss": 0.4337, "step": 32760 }, { "epoch": 3.282265738468473, "grad_norm": 1.878641963005066, "learning_rate": 1.3250376118879408e-05, "loss": 0.4516, "step": 32770 }, { "epoch": 3.2832673911954724, "grad_norm": 2.452392578125, "learning_rate": 1.32364633440141e-05, "loss": 0.4819, "step": 32780 }, { "epoch": 3.284269043922472, "grad_norm": 3.089430332183838, "learning_rate": 1.3222555246825407e-05, "loss": 0.4242, "step": 32790 }, { "epoch": 3.2852706966494716, "grad_norm": 1.9483718872070312, "learning_rate": 1.3208651832843796e-05, "loss": 0.4246, "step": 32800 }, { "epoch": 3.286272349376471, "grad_norm": 2.125678539276123, "learning_rate": 1.3194753107597824e-05, "loss": 0.4367, "step": 32810 }, { "epoch": 3.2872740021034708, "grad_norm": 2.1187121868133545, "learning_rate": 1.3180859076614205e-05, "loss": 0.4101, "step": 32820 }, { "epoch": 3.2882756548304704, "grad_norm": 1.9084290266036987, "learning_rate": 1.3166969745417813e-05, "loss": 0.3884, "step": 32830 }, { "epoch": 3.28927730755747, "grad_norm": 2.143211603164673, "learning_rate": 1.3153085119531624e-05, "loss": 0.4027, "step": 32840 }, { "epoch": 3.2902789602844695, "grad_norm": 1.8853137493133545, "learning_rate": 1.3139205204476726e-05, "loss": 0.4311, "step": 32850 }, { "epoch": 3.2912806130114687, "grad_norm": 2.1946933269500732, "learning_rate": 1.3125330005772385e-05, "loss": 0.4407, "step": 32860 }, { "epoch": 3.2922822657384687, "grad_norm": 2.14318585395813, "learning_rate": 1.3111459528935949e-05, "loss": 0.4017, "step": 32870 }, { "epoch": 3.293283918465468, "grad_norm": 2.319744110107422, "learning_rate": 1.3097593779482914e-05, "loss": 0.4307, "step": 32880 }, { "epoch": 3.2942855711924675, "grad_norm": 2.9636361598968506, "learning_rate": 1.3083732762926859e-05, "loss": 0.4675, "step": 32890 }, { "epoch": 3.295287223919467, "grad_norm": 2.6737558841705322, "learning_rate": 1.306987648477953e-05, "loss": 0.5051, "step": 32900 }, { "epoch": 3.2962888766464666, "grad_norm": 2.0904157161712646, "learning_rate": 1.3056024950550772e-05, "loss": 0.4611, "step": 32910 }, { "epoch": 3.2972905293734662, "grad_norm": 2.091607093811035, "learning_rate": 1.3042178165748507e-05, "loss": 0.3842, "step": 32920 }, { "epoch": 3.298292182100466, "grad_norm": 2.5052433013916016, "learning_rate": 1.3028336135878822e-05, "loss": 0.4381, "step": 32930 }, { "epoch": 3.2992938348274654, "grad_norm": 1.78681480884552, "learning_rate": 1.3014498866445887e-05, "loss": 0.4373, "step": 32940 }, { "epoch": 3.300295487554465, "grad_norm": 1.7345925569534302, "learning_rate": 1.3000666362951979e-05, "loss": 0.4163, "step": 32950 }, { "epoch": 3.3012971402814646, "grad_norm": 2.144805431365967, "learning_rate": 1.298683863089748e-05, "loss": 0.4375, "step": 32960 }, { "epoch": 3.3022987930084637, "grad_norm": 2.5673115253448486, "learning_rate": 1.2973015675780883e-05, "loss": 0.403, "step": 32970 }, { "epoch": 3.3033004457354633, "grad_norm": 1.9869693517684937, "learning_rate": 1.2959197503098775e-05, "loss": 0.4217, "step": 32980 }, { "epoch": 3.304302098462463, "grad_norm": 2.347975969314575, "learning_rate": 1.2945384118345838e-05, "loss": 0.4225, "step": 32990 }, { "epoch": 3.3053037511894625, "grad_norm": 2.203904390335083, "learning_rate": 1.2931575527014855e-05, "loss": 0.457, "step": 33000 }, { "epoch": 3.306305403916462, "grad_norm": 2.366241216659546, "learning_rate": 1.2917771734596707e-05, "loss": 0.4346, "step": 33010 }, { "epoch": 3.3073070566434617, "grad_norm": 2.1328248977661133, "learning_rate": 1.2903972746580361e-05, "loss": 0.4801, "step": 33020 }, { "epoch": 3.3083087093704613, "grad_norm": 1.9029438495635986, "learning_rate": 1.2890178568452871e-05, "loss": 0.4414, "step": 33030 }, { "epoch": 3.309310362097461, "grad_norm": 1.8686634302139282, "learning_rate": 1.2876389205699386e-05, "loss": 0.4772, "step": 33040 }, { "epoch": 3.3103120148244605, "grad_norm": 2.073631525039673, "learning_rate": 1.2862604663803118e-05, "loss": 0.544, "step": 33050 }, { "epoch": 3.31131366755146, "grad_norm": 1.7073830366134644, "learning_rate": 1.2848824948245423e-05, "loss": 0.4515, "step": 33060 }, { "epoch": 3.3123153202784597, "grad_norm": 2.569355010986328, "learning_rate": 1.2835050064505655e-05, "loss": 0.4384, "step": 33070 }, { "epoch": 3.313316973005459, "grad_norm": 1.8745815753936768, "learning_rate": 1.2821280018061305e-05, "loss": 0.4025, "step": 33080 }, { "epoch": 3.3143186257324584, "grad_norm": 2.1308634281158447, "learning_rate": 1.280751481438791e-05, "loss": 0.4674, "step": 33090 }, { "epoch": 3.315320278459458, "grad_norm": 2.4393298625946045, "learning_rate": 1.2793754458959109e-05, "loss": 0.4237, "step": 33100 }, { "epoch": 3.3163219311864576, "grad_norm": 1.8815350532531738, "learning_rate": 1.2779998957246586e-05, "loss": 0.502, "step": 33110 }, { "epoch": 3.317323583913457, "grad_norm": 2.3304734230041504, "learning_rate": 1.2766248314720102e-05, "loss": 0.4362, "step": 33120 }, { "epoch": 3.3183252366404568, "grad_norm": 2.8277807235717773, "learning_rate": 1.2752502536847516e-05, "loss": 0.4311, "step": 33130 }, { "epoch": 3.3193268893674563, "grad_norm": 1.8867182731628418, "learning_rate": 1.2738761629094703e-05, "loss": 0.403, "step": 33140 }, { "epoch": 3.320328542094456, "grad_norm": 2.2600784301757812, "learning_rate": 1.2725025596925622e-05, "loss": 0.4428, "step": 33150 }, { "epoch": 3.3213301948214555, "grad_norm": 2.057344436645508, "learning_rate": 1.2711294445802319e-05, "loss": 0.3904, "step": 33160 }, { "epoch": 3.322331847548455, "grad_norm": 2.2911007404327393, "learning_rate": 1.2697568181184872e-05, "loss": 0.4301, "step": 33170 }, { "epoch": 3.3233335002754547, "grad_norm": 2.3054580688476562, "learning_rate": 1.2683846808531396e-05, "loss": 0.4788, "step": 33180 }, { "epoch": 3.324335153002454, "grad_norm": 2.0127499103546143, "learning_rate": 1.2670130333298113e-05, "loss": 0.4351, "step": 33190 }, { "epoch": 3.3253368057294534, "grad_norm": 1.7581689357757568, "learning_rate": 1.2656418760939267e-05, "loss": 0.3516, "step": 33200 }, { "epoch": 3.326338458456453, "grad_norm": 2.3614418506622314, "learning_rate": 1.2642712096907145e-05, "loss": 0.4669, "step": 33210 }, { "epoch": 3.3273401111834526, "grad_norm": 2.0278165340423584, "learning_rate": 1.2629010346652082e-05, "loss": 0.3568, "step": 33220 }, { "epoch": 3.3283417639104522, "grad_norm": 2.4216668605804443, "learning_rate": 1.2615313515622496e-05, "loss": 0.417, "step": 33230 }, { "epoch": 3.329343416637452, "grad_norm": 2.564865827560425, "learning_rate": 1.260162160926482e-05, "loss": 0.418, "step": 33240 }, { "epoch": 3.3303450693644514, "grad_norm": 1.9982346296310425, "learning_rate": 1.2587934633023505e-05, "loss": 0.3987, "step": 33250 }, { "epoch": 3.331346722091451, "grad_norm": 1.943405032157898, "learning_rate": 1.2574252592341095e-05, "loss": 0.4683, "step": 33260 }, { "epoch": 3.3323483748184506, "grad_norm": 2.346015453338623, "learning_rate": 1.2560575492658145e-05, "loss": 0.4025, "step": 33270 }, { "epoch": 3.33335002754545, "grad_norm": 1.8255881071090698, "learning_rate": 1.2546903339413214e-05, "loss": 0.4333, "step": 33280 }, { "epoch": 3.3343516802724498, "grad_norm": 2.3527069091796875, "learning_rate": 1.2533236138042956e-05, "loss": 0.4259, "step": 33290 }, { "epoch": 3.335353332999449, "grad_norm": 1.7683374881744385, "learning_rate": 1.2519573893982014e-05, "loss": 0.3926, "step": 33300 }, { "epoch": 3.3363549857264485, "grad_norm": 1.8756691217422485, "learning_rate": 1.2505916612663071e-05, "loss": 0.4162, "step": 33310 }, { "epoch": 3.337356638453448, "grad_norm": 2.328777313232422, "learning_rate": 1.2492264299516837e-05, "loss": 0.4315, "step": 33320 }, { "epoch": 3.3383582911804477, "grad_norm": 2.066019058227539, "learning_rate": 1.2478616959972044e-05, "loss": 0.3843, "step": 33330 }, { "epoch": 3.3393599439074473, "grad_norm": 2.2497713565826416, "learning_rate": 1.2464974599455451e-05, "loss": 0.4682, "step": 33340 }, { "epoch": 3.340361596634447, "grad_norm": 2.539682149887085, "learning_rate": 1.245133722339183e-05, "loss": 0.4342, "step": 33350 }, { "epoch": 3.3413632493614465, "grad_norm": 2.3787872791290283, "learning_rate": 1.2437704837203972e-05, "loss": 0.3957, "step": 33360 }, { "epoch": 3.342364902088446, "grad_norm": 2.2526967525482178, "learning_rate": 1.2424077446312696e-05, "loss": 0.4469, "step": 33370 }, { "epoch": 3.3433665548154456, "grad_norm": 2.094153642654419, "learning_rate": 1.2410455056136818e-05, "loss": 0.4025, "step": 33380 }, { "epoch": 3.344368207542445, "grad_norm": 1.881780982017517, "learning_rate": 1.2396837672093175e-05, "loss": 0.4512, "step": 33390 }, { "epoch": 3.345369860269445, "grad_norm": 2.2013063430786133, "learning_rate": 1.2383225299596616e-05, "loss": 0.4346, "step": 33400 }, { "epoch": 3.346371512996444, "grad_norm": 1.754610300064087, "learning_rate": 1.2369617944059983e-05, "loss": 0.3997, "step": 33410 }, { "epoch": 3.3473731657234436, "grad_norm": 2.1179208755493164, "learning_rate": 1.2356015610894139e-05, "loss": 0.4434, "step": 33420 }, { "epoch": 3.348374818450443, "grad_norm": 1.6774661540985107, "learning_rate": 1.2342418305507943e-05, "loss": 0.4495, "step": 33430 }, { "epoch": 3.3493764711774427, "grad_norm": 2.1426808834075928, "learning_rate": 1.2328826033308252e-05, "loss": 0.4384, "step": 33440 }, { "epoch": 3.3503781239044423, "grad_norm": 1.60723876953125, "learning_rate": 1.2315238799699922e-05, "loss": 0.4383, "step": 33450 }, { "epoch": 3.351379776631442, "grad_norm": 2.91386079788208, "learning_rate": 1.2301656610085827e-05, "loss": 0.4487, "step": 33460 }, { "epoch": 3.3523814293584415, "grad_norm": 2.210634708404541, "learning_rate": 1.2288079469866795e-05, "loss": 0.4915, "step": 33470 }, { "epoch": 3.353383082085441, "grad_norm": 2.1685736179351807, "learning_rate": 1.2274507384441666e-05, "loss": 0.4122, "step": 33480 }, { "epoch": 3.3543847348124407, "grad_norm": 1.9470291137695312, "learning_rate": 1.2260940359207299e-05, "loss": 0.446, "step": 33490 }, { "epoch": 3.35538638753944, "grad_norm": 1.978200912475586, "learning_rate": 1.2247378399558488e-05, "loss": 0.4294, "step": 33500 }, { "epoch": 3.3563880402664394, "grad_norm": 1.9848915338516235, "learning_rate": 1.2233821510888033e-05, "loss": 0.5088, "step": 33510 }, { "epoch": 3.357389692993439, "grad_norm": 1.9251346588134766, "learning_rate": 1.2220269698586744e-05, "loss": 0.5011, "step": 33520 }, { "epoch": 3.3583913457204386, "grad_norm": 2.3554296493530273, "learning_rate": 1.2206722968043396e-05, "loss": 0.4617, "step": 33530 }, { "epoch": 3.359392998447438, "grad_norm": 2.23020339012146, "learning_rate": 1.2193181324644712e-05, "loss": 0.4251, "step": 33540 }, { "epoch": 3.360394651174438, "grad_norm": 1.555371642112732, "learning_rate": 1.2179644773775422e-05, "loss": 0.4204, "step": 33550 }, { "epoch": 3.3613963039014374, "grad_norm": 3.28185772895813, "learning_rate": 1.216611332081826e-05, "loss": 0.4155, "step": 33560 }, { "epoch": 3.362397956628437, "grad_norm": 2.456796884536743, "learning_rate": 1.2152586971153865e-05, "loss": 0.4124, "step": 33570 }, { "epoch": 3.3633996093554366, "grad_norm": 3.379307270050049, "learning_rate": 1.2139065730160884e-05, "loss": 0.4497, "step": 33580 }, { "epoch": 3.364401262082436, "grad_norm": 2.1304967403411865, "learning_rate": 1.2125549603215952e-05, "loss": 0.4305, "step": 33590 }, { "epoch": 3.3654029148094358, "grad_norm": 2.1245455741882324, "learning_rate": 1.2112038595693651e-05, "loss": 0.4025, "step": 33600 }, { "epoch": 3.366404567536435, "grad_norm": 1.967885971069336, "learning_rate": 1.209853271296649e-05, "loss": 0.4145, "step": 33610 }, { "epoch": 3.3674062202634345, "grad_norm": 1.7838064432144165, "learning_rate": 1.208503196040501e-05, "loss": 0.3829, "step": 33620 }, { "epoch": 3.368407872990434, "grad_norm": 2.147975444793701, "learning_rate": 1.2071536343377668e-05, "loss": 0.4521, "step": 33630 }, { "epoch": 3.3694095257174337, "grad_norm": 2.331613063812256, "learning_rate": 1.2058045867250884e-05, "loss": 0.4155, "step": 33640 }, { "epoch": 3.3704111784444333, "grad_norm": 2.6464388370513916, "learning_rate": 1.2044560537389044e-05, "loss": 0.4248, "step": 33650 }, { "epoch": 3.371412831171433, "grad_norm": 1.795331597328186, "learning_rate": 1.2031080359154476e-05, "loss": 0.404, "step": 33660 }, { "epoch": 3.3724144838984325, "grad_norm": 2.6335437297821045, "learning_rate": 1.2017605337907472e-05, "loss": 0.4048, "step": 33670 }, { "epoch": 3.373416136625432, "grad_norm": 1.8416485786437988, "learning_rate": 1.2004135479006263e-05, "loss": 0.4342, "step": 33680 }, { "epoch": 3.3744177893524316, "grad_norm": 2.2963712215423584, "learning_rate": 1.1990670787807031e-05, "loss": 0.4062, "step": 33690 }, { "epoch": 3.3754194420794312, "grad_norm": 1.744742751121521, "learning_rate": 1.1977211269663904e-05, "loss": 0.3897, "step": 33700 }, { "epoch": 3.376421094806431, "grad_norm": 1.9296424388885498, "learning_rate": 1.1963756929928952e-05, "loss": 0.4246, "step": 33710 }, { "epoch": 3.37742274753343, "grad_norm": 2.5108537673950195, "learning_rate": 1.1950307773952185e-05, "loss": 0.4156, "step": 33720 }, { "epoch": 3.3784244002604296, "grad_norm": 2.305176258087158, "learning_rate": 1.1936863807081555e-05, "loss": 0.4031, "step": 33730 }, { "epoch": 3.379426052987429, "grad_norm": 2.122509002685547, "learning_rate": 1.1923425034662946e-05, "loss": 0.4498, "step": 33740 }, { "epoch": 3.3804277057144287, "grad_norm": 2.2472517490386963, "learning_rate": 1.1909991462040182e-05, "loss": 0.4545, "step": 33750 }, { "epoch": 3.3814293584414283, "grad_norm": 2.068202495574951, "learning_rate": 1.1896563094555014e-05, "loss": 0.4709, "step": 33760 }, { "epoch": 3.382431011168428, "grad_norm": 2.0109431743621826, "learning_rate": 1.1883139937547122e-05, "loss": 0.4377, "step": 33770 }, { "epoch": 3.3834326638954275, "grad_norm": 2.1727137565612793, "learning_rate": 1.1869721996354124e-05, "loss": 0.4618, "step": 33780 }, { "epoch": 3.384434316622427, "grad_norm": 3.1651906967163086, "learning_rate": 1.1856309276311553e-05, "loss": 0.3812, "step": 33790 }, { "epoch": 3.3854359693494267, "grad_norm": 2.4753785133361816, "learning_rate": 1.1842901782752871e-05, "loss": 0.4006, "step": 33800 }, { "epoch": 3.3864376220764263, "grad_norm": 2.4785983562469482, "learning_rate": 1.1829499521009452e-05, "loss": 0.457, "step": 33810 }, { "epoch": 3.387439274803426, "grad_norm": 1.973423719406128, "learning_rate": 1.1816102496410627e-05, "loss": 0.4479, "step": 33820 }, { "epoch": 3.388440927530425, "grad_norm": 2.3075802326202393, "learning_rate": 1.1802710714283588e-05, "loss": 0.4552, "step": 33830 }, { "epoch": 3.3894425802574246, "grad_norm": 2.3044092655181885, "learning_rate": 1.1789324179953467e-05, "loss": 0.4096, "step": 33840 }, { "epoch": 3.390444232984424, "grad_norm": 2.105095863342285, "learning_rate": 1.1775942898743347e-05, "loss": 0.4348, "step": 33850 }, { "epoch": 3.391445885711424, "grad_norm": 2.2408626079559326, "learning_rate": 1.1762566875974157e-05, "loss": 0.4708, "step": 33860 }, { "epoch": 3.3924475384384234, "grad_norm": 2.347888469696045, "learning_rate": 1.1749196116964764e-05, "loss": 0.4569, "step": 33870 }, { "epoch": 3.393449191165423, "grad_norm": 2.3559656143188477, "learning_rate": 1.1735830627031966e-05, "loss": 0.4546, "step": 33880 }, { "epoch": 3.3944508438924226, "grad_norm": 1.7592754364013672, "learning_rate": 1.1722470411490438e-05, "loss": 0.4055, "step": 33890 }, { "epoch": 3.395452496619422, "grad_norm": 2.887687921524048, "learning_rate": 1.1709115475652751e-05, "loss": 0.4309, "step": 33900 }, { "epoch": 3.3964541493464218, "grad_norm": 2.2861642837524414, "learning_rate": 1.1695765824829386e-05, "loss": 0.4514, "step": 33910 }, { "epoch": 3.397455802073421, "grad_norm": 2.557394504547119, "learning_rate": 1.1682421464328745e-05, "loss": 0.4279, "step": 33920 }, { "epoch": 3.398457454800421, "grad_norm": 2.420703411102295, "learning_rate": 1.1669082399457105e-05, "loss": 0.4231, "step": 33930 }, { "epoch": 3.39945910752742, "grad_norm": 1.7291672229766846, "learning_rate": 1.1655748635518609e-05, "loss": 0.4588, "step": 33940 }, { "epoch": 3.4004607602544197, "grad_norm": 2.1979129314422607, "learning_rate": 1.1642420177815352e-05, "loss": 0.4296, "step": 33950 }, { "epoch": 3.4014624129814193, "grad_norm": 2.324911594390869, "learning_rate": 1.1629097031647293e-05, "loss": 0.488, "step": 33960 }, { "epoch": 3.402464065708419, "grad_norm": 2.866677761077881, "learning_rate": 1.1615779202312241e-05, "loss": 0.4562, "step": 33970 }, { "epoch": 3.4034657184354185, "grad_norm": 1.6479003429412842, "learning_rate": 1.1602466695105956e-05, "loss": 0.4266, "step": 33980 }, { "epoch": 3.404467371162418, "grad_norm": 2.6396446228027344, "learning_rate": 1.158915951532204e-05, "loss": 0.4387, "step": 33990 }, { "epoch": 3.4054690238894176, "grad_norm": 2.2791261672973633, "learning_rate": 1.1575857668251988e-05, "loss": 0.4231, "step": 34000 }, { "epoch": 3.4064706766164172, "grad_norm": 1.8278400897979736, "learning_rate": 1.1562561159185174e-05, "loss": 0.4151, "step": 34010 }, { "epoch": 3.407472329343417, "grad_norm": 2.3506553173065186, "learning_rate": 1.1549269993408846e-05, "loss": 0.4128, "step": 34020 }, { "epoch": 3.408473982070416, "grad_norm": 2.4189066886901855, "learning_rate": 1.153598417620813e-05, "loss": 0.4881, "step": 34030 }, { "epoch": 3.4094756347974156, "grad_norm": 2.4470417499542236, "learning_rate": 1.1522703712866029e-05, "loss": 0.4015, "step": 34040 }, { "epoch": 3.410477287524415, "grad_norm": 1.7285627126693726, "learning_rate": 1.150942860866341e-05, "loss": 0.4424, "step": 34050 }, { "epoch": 3.4114789402514147, "grad_norm": 2.0729176998138428, "learning_rate": 1.1496158868879015e-05, "loss": 0.4989, "step": 34060 }, { "epoch": 3.4124805929784143, "grad_norm": 2.230759382247925, "learning_rate": 1.148289449878945e-05, "loss": 0.4757, "step": 34070 }, { "epoch": 3.413482245705414, "grad_norm": 1.9001071453094482, "learning_rate": 1.1469635503669186e-05, "loss": 0.4198, "step": 34080 }, { "epoch": 3.4144838984324135, "grad_norm": 2.2974631786346436, "learning_rate": 1.1456381888790554e-05, "loss": 0.4217, "step": 34090 }, { "epoch": 3.415485551159413, "grad_norm": 1.4676469564437866, "learning_rate": 1.144313365942375e-05, "loss": 0.4494, "step": 34100 }, { "epoch": 3.4164872038864127, "grad_norm": 2.2653024196624756, "learning_rate": 1.1429890820836829e-05, "loss": 0.4153, "step": 34110 }, { "epoch": 3.4174888566134123, "grad_norm": 1.8923325538635254, "learning_rate": 1.1416653378295697e-05, "loss": 0.4202, "step": 34120 }, { "epoch": 3.418490509340412, "grad_norm": 2.0065672397613525, "learning_rate": 1.1403421337064121e-05, "loss": 0.4641, "step": 34130 }, { "epoch": 3.419492162067411, "grad_norm": 1.9923065900802612, "learning_rate": 1.1390194702403715e-05, "loss": 0.4349, "step": 34140 }, { "epoch": 3.4204938147944106, "grad_norm": 2.1380667686462402, "learning_rate": 1.1376973479573942e-05, "loss": 0.45, "step": 34150 }, { "epoch": 3.42149546752141, "grad_norm": 1.5650336742401123, "learning_rate": 1.136375767383212e-05, "loss": 0.4001, "step": 34160 }, { "epoch": 3.42249712024841, "grad_norm": 1.5955073833465576, "learning_rate": 1.1350547290433399e-05, "loss": 0.4333, "step": 34170 }, { "epoch": 3.4234987729754094, "grad_norm": 1.952041506767273, "learning_rate": 1.133734233463081e-05, "loss": 0.4634, "step": 34180 }, { "epoch": 3.424500425702409, "grad_norm": 2.256084680557251, "learning_rate": 1.1324142811675167e-05, "loss": 0.4104, "step": 34190 }, { "epoch": 3.4255020784294086, "grad_norm": 1.7892006635665894, "learning_rate": 1.1310948726815162e-05, "loss": 0.4574, "step": 34200 }, { "epoch": 3.426503731156408, "grad_norm": 2.5087509155273438, "learning_rate": 1.1297760085297332e-05, "loss": 0.4494, "step": 34210 }, { "epoch": 3.4275053838834078, "grad_norm": 1.9253290891647339, "learning_rate": 1.1284576892366037e-05, "loss": 0.4651, "step": 34220 }, { "epoch": 3.4285070366104073, "grad_norm": 2.0629913806915283, "learning_rate": 1.1271399153263448e-05, "loss": 0.3922, "step": 34230 }, { "epoch": 3.429508689337407, "grad_norm": 2.1463217735290527, "learning_rate": 1.1258226873229589e-05, "loss": 0.3902, "step": 34240 }, { "epoch": 3.430510342064406, "grad_norm": 2.2839205265045166, "learning_rate": 1.124506005750234e-05, "loss": 0.4098, "step": 34250 }, { "epoch": 3.4315119947914057, "grad_norm": 2.252955198287964, "learning_rate": 1.1231898711317352e-05, "loss": 0.441, "step": 34260 }, { "epoch": 3.4325136475184053, "grad_norm": 2.093442678451538, "learning_rate": 1.1218742839908131e-05, "loss": 0.3993, "step": 34270 }, { "epoch": 3.433515300245405, "grad_norm": 2.032712459564209, "learning_rate": 1.1205592448506022e-05, "loss": 0.4433, "step": 34280 }, { "epoch": 3.4345169529724044, "grad_norm": 2.3281428813934326, "learning_rate": 1.1192447542340173e-05, "loss": 0.4208, "step": 34290 }, { "epoch": 3.435518605699404, "grad_norm": 2.4409408569335938, "learning_rate": 1.1179308126637524e-05, "loss": 0.3541, "step": 34300 }, { "epoch": 3.4365202584264036, "grad_norm": 2.3380045890808105, "learning_rate": 1.1166174206622887e-05, "loss": 0.4353, "step": 34310 }, { "epoch": 3.437521911153403, "grad_norm": 1.6690657138824463, "learning_rate": 1.115304578751886e-05, "loss": 0.4292, "step": 34320 }, { "epoch": 3.438523563880403, "grad_norm": 2.014461040496826, "learning_rate": 1.1139922874545824e-05, "loss": 0.487, "step": 34330 }, { "epoch": 3.4395252166074024, "grad_norm": 2.823160409927368, "learning_rate": 1.1126805472922034e-05, "loss": 0.4126, "step": 34340 }, { "epoch": 3.440526869334402, "grad_norm": 2.1313488483428955, "learning_rate": 1.1113693587863507e-05, "loss": 0.4308, "step": 34350 }, { "epoch": 3.441528522061401, "grad_norm": 1.7940298318862915, "learning_rate": 1.1100587224584092e-05, "loss": 0.4637, "step": 34360 }, { "epoch": 3.4425301747884007, "grad_norm": 2.9107887744903564, "learning_rate": 1.1087486388295399e-05, "loss": 0.4597, "step": 34370 }, { "epoch": 3.4435318275154003, "grad_norm": 1.9696946144104004, "learning_rate": 1.10743910842069e-05, "loss": 0.4005, "step": 34380 }, { "epoch": 3.4445334802424, "grad_norm": 1.943562388420105, "learning_rate": 1.106130131752583e-05, "loss": 0.4398, "step": 34390 }, { "epoch": 3.4455351329693995, "grad_norm": 2.523973226547241, "learning_rate": 1.1048217093457228e-05, "loss": 0.4519, "step": 34400 }, { "epoch": 3.446536785696399, "grad_norm": 2.25036883354187, "learning_rate": 1.1035138417203936e-05, "loss": 0.4734, "step": 34410 }, { "epoch": 3.4475384384233987, "grad_norm": 2.8198471069335938, "learning_rate": 1.1022065293966579e-05, "loss": 0.4328, "step": 34420 }, { "epoch": 3.4485400911503983, "grad_norm": 1.8240866661071777, "learning_rate": 1.1008997728943584e-05, "loss": 0.4731, "step": 34430 }, { "epoch": 3.449541743877398, "grad_norm": 2.043656349182129, "learning_rate": 1.0995935727331166e-05, "loss": 0.439, "step": 34440 }, { "epoch": 3.450543396604397, "grad_norm": 2.4668407440185547, "learning_rate": 1.0982879294323323e-05, "loss": 0.4616, "step": 34450 }, { "epoch": 3.451545049331397, "grad_norm": 1.7924633026123047, "learning_rate": 1.0969828435111843e-05, "loss": 0.4282, "step": 34460 }, { "epoch": 3.452546702058396, "grad_norm": 1.8731104135513306, "learning_rate": 1.0956783154886297e-05, "loss": 0.395, "step": 34470 }, { "epoch": 3.453548354785396, "grad_norm": 2.430614948272705, "learning_rate": 1.0943743458834033e-05, "loss": 0.4489, "step": 34480 }, { "epoch": 3.4545500075123954, "grad_norm": 2.1472465991973877, "learning_rate": 1.0930709352140183e-05, "loss": 0.4062, "step": 34490 }, { "epoch": 3.455551660239395, "grad_norm": 2.722684383392334, "learning_rate": 1.0917680839987652e-05, "loss": 0.4683, "step": 34500 }, { "epoch": 3.4565533129663946, "grad_norm": 2.144862174987793, "learning_rate": 1.0904657927557144e-05, "loss": 0.4497, "step": 34510 }, { "epoch": 3.457554965693394, "grad_norm": 2.7352027893066406, "learning_rate": 1.0891640620027094e-05, "loss": 0.4247, "step": 34520 }, { "epoch": 3.4585566184203937, "grad_norm": 2.6462039947509766, "learning_rate": 1.0878628922573727e-05, "loss": 0.4004, "step": 34530 }, { "epoch": 3.4595582711473933, "grad_norm": 2.273921012878418, "learning_rate": 1.0865622840371074e-05, "loss": 0.4683, "step": 34540 }, { "epoch": 3.460559923874393, "grad_norm": 1.9296114444732666, "learning_rate": 1.0852622378590866e-05, "loss": 0.4534, "step": 34550 }, { "epoch": 3.461561576601392, "grad_norm": 2.1889405250549316, "learning_rate": 1.0839627542402644e-05, "loss": 0.3771, "step": 34560 }, { "epoch": 3.4625632293283917, "grad_norm": 2.109031915664673, "learning_rate": 1.0826638336973696e-05, "loss": 0.4224, "step": 34570 }, { "epoch": 3.4635648820553913, "grad_norm": 3.0517172813415527, "learning_rate": 1.08136547674691e-05, "loss": 0.4029, "step": 34580 }, { "epoch": 3.464566534782391, "grad_norm": 2.070493459701538, "learning_rate": 1.0800676839051641e-05, "loss": 0.4262, "step": 34590 }, { "epoch": 3.4655681875093904, "grad_norm": 2.03619384765625, "learning_rate": 1.0787704556881888e-05, "loss": 0.4141, "step": 34600 }, { "epoch": 3.46656984023639, "grad_norm": 2.0710926055908203, "learning_rate": 1.0774737926118198e-05, "loss": 0.4238, "step": 34610 }, { "epoch": 3.4675714929633896, "grad_norm": 2.2858543395996094, "learning_rate": 1.0761776951916616e-05, "loss": 0.3914, "step": 34620 }, { "epoch": 3.468573145690389, "grad_norm": 2.7241454124450684, "learning_rate": 1.0748821639430973e-05, "loss": 0.4691, "step": 34630 }, { "epoch": 3.469574798417389, "grad_norm": 2.1155881881713867, "learning_rate": 1.0735871993812863e-05, "loss": 0.4633, "step": 34640 }, { "epoch": 3.4705764511443884, "grad_norm": 1.7804661989212036, "learning_rate": 1.0722928020211609e-05, "loss": 0.4208, "step": 34650 }, { "epoch": 3.471578103871388, "grad_norm": 1.557573676109314, "learning_rate": 1.070998972377425e-05, "loss": 0.4351, "step": 34660 }, { "epoch": 3.472579756598387, "grad_norm": 2.085498094558716, "learning_rate": 1.0697057109645629e-05, "loss": 0.4311, "step": 34670 }, { "epoch": 3.4735814093253867, "grad_norm": 2.409001111984253, "learning_rate": 1.0684130182968284e-05, "loss": 0.4173, "step": 34680 }, { "epoch": 3.4745830620523863, "grad_norm": 1.4353440999984741, "learning_rate": 1.0671208948882514e-05, "loss": 0.441, "step": 34690 }, { "epoch": 3.475584714779386, "grad_norm": 2.4345576763153076, "learning_rate": 1.0658293412526316e-05, "loss": 0.3898, "step": 34700 }, { "epoch": 3.4765863675063855, "grad_norm": 2.105102777481079, "learning_rate": 1.0645383579035478e-05, "loss": 0.4628, "step": 34710 }, { "epoch": 3.477588020233385, "grad_norm": 2.4883739948272705, "learning_rate": 1.0632479453543498e-05, "loss": 0.4361, "step": 34720 }, { "epoch": 3.4785896729603847, "grad_norm": 1.9684547185897827, "learning_rate": 1.0619581041181565e-05, "loss": 0.4194, "step": 34730 }, { "epoch": 3.4795913256873843, "grad_norm": 1.775962233543396, "learning_rate": 1.0606688347078659e-05, "loss": 0.4438, "step": 34740 }, { "epoch": 3.480592978414384, "grad_norm": 2.144451856613159, "learning_rate": 1.0593801376361448e-05, "loss": 0.41, "step": 34750 }, { "epoch": 3.4815946311413835, "grad_norm": 1.9578640460968018, "learning_rate": 1.0580920134154332e-05, "loss": 0.4127, "step": 34760 }, { "epoch": 3.482596283868383, "grad_norm": 2.3081228733062744, "learning_rate": 1.0568044625579437e-05, "loss": 0.4418, "step": 34770 }, { "epoch": 3.483597936595382, "grad_norm": 2.8557839393615723, "learning_rate": 1.0555174855756605e-05, "loss": 0.4509, "step": 34780 }, { "epoch": 3.484599589322382, "grad_norm": 2.0425710678100586, "learning_rate": 1.0542310829803396e-05, "loss": 0.4134, "step": 34790 }, { "epoch": 3.4856012420493814, "grad_norm": 1.7663625478744507, "learning_rate": 1.052945255283509e-05, "loss": 0.3944, "step": 34800 }, { "epoch": 3.486602894776381, "grad_norm": 2.232553243637085, "learning_rate": 1.0516600029964677e-05, "loss": 0.4513, "step": 34810 }, { "epoch": 3.4876045475033806, "grad_norm": 2.3331985473632812, "learning_rate": 1.0503753266302863e-05, "loss": 0.4078, "step": 34820 }, { "epoch": 3.48860620023038, "grad_norm": 1.9388011693954468, "learning_rate": 1.0490912266958055e-05, "loss": 0.4758, "step": 34830 }, { "epoch": 3.4896078529573797, "grad_norm": 2.1196694374084473, "learning_rate": 1.047807703703638e-05, "loss": 0.4602, "step": 34840 }, { "epoch": 3.4906095056843793, "grad_norm": 1.5808969736099243, "learning_rate": 1.0465247581641663e-05, "loss": 0.3994, "step": 34850 }, { "epoch": 3.491611158411379, "grad_norm": 1.7989665269851685, "learning_rate": 1.0452423905875425e-05, "loss": 0.407, "step": 34860 }, { "epoch": 3.4926128111383785, "grad_norm": 2.2902822494506836, "learning_rate": 1.0439606014836924e-05, "loss": 0.3768, "step": 34870 }, { "epoch": 3.493614463865378, "grad_norm": 2.374018430709839, "learning_rate": 1.042679391362307e-05, "loss": 0.4478, "step": 34880 }, { "epoch": 3.4946161165923773, "grad_norm": 1.874822735786438, "learning_rate": 1.0413987607328497e-05, "loss": 0.451, "step": 34890 }, { "epoch": 3.495617769319377, "grad_norm": 2.655895233154297, "learning_rate": 1.0401187101045534e-05, "loss": 0.4629, "step": 34900 }, { "epoch": 3.4966194220463764, "grad_norm": 2.248385429382324, "learning_rate": 1.03883923998642e-05, "loss": 0.4135, "step": 34910 }, { "epoch": 3.497621074773376, "grad_norm": 2.5209267139434814, "learning_rate": 1.0375603508872203e-05, "loss": 0.4222, "step": 34920 }, { "epoch": 3.4986227275003756, "grad_norm": 1.8577488660812378, "learning_rate": 1.0362820433154938e-05, "loss": 0.4049, "step": 34930 }, { "epoch": 3.499624380227375, "grad_norm": 2.4650468826293945, "learning_rate": 1.0350043177795517e-05, "loss": 0.4289, "step": 34940 }, { "epoch": 3.500626032954375, "grad_norm": 2.639314889907837, "learning_rate": 1.033727174787469e-05, "loss": 0.4502, "step": 34950 }, { "epoch": 3.5016276856813744, "grad_norm": 2.0178632736206055, "learning_rate": 1.0324506148470917e-05, "loss": 0.4272, "step": 34960 }, { "epoch": 3.502629338408374, "grad_norm": 1.9113802909851074, "learning_rate": 1.0311746384660346e-05, "loss": 0.4286, "step": 34970 }, { "epoch": 3.503630991135373, "grad_norm": 2.1833677291870117, "learning_rate": 1.0298992461516802e-05, "loss": 0.501, "step": 34980 }, { "epoch": 3.504632643862373, "grad_norm": 2.2047367095947266, "learning_rate": 1.028624438411175e-05, "loss": 0.4528, "step": 34990 }, { "epoch": 3.5056342965893723, "grad_norm": 1.7950270175933838, "learning_rate": 1.0273502157514394e-05, "loss": 0.4207, "step": 35000 }, { "epoch": 3.506635949316372, "grad_norm": 1.9958217144012451, "learning_rate": 1.0260765786791577e-05, "loss": 0.3915, "step": 35010 }, { "epoch": 3.5076376020433715, "grad_norm": 2.9710733890533447, "learning_rate": 1.0248035277007783e-05, "loss": 0.4248, "step": 35020 }, { "epoch": 3.508639254770371, "grad_norm": 1.977878212928772, "learning_rate": 1.0235310633225232e-05, "loss": 0.4082, "step": 35030 }, { "epoch": 3.5096409074973707, "grad_norm": 2.591295003890991, "learning_rate": 1.0222591860503761e-05, "loss": 0.4369, "step": 35040 }, { "epoch": 3.5106425602243703, "grad_norm": 1.7341768741607666, "learning_rate": 1.0209878963900904e-05, "loss": 0.3957, "step": 35050 }, { "epoch": 3.51164421295137, "grad_norm": 2.3429300785064697, "learning_rate": 1.0197171948471812e-05, "loss": 0.4407, "step": 35060 }, { "epoch": 3.5126458656783695, "grad_norm": 2.3763067722320557, "learning_rate": 1.0184470819269359e-05, "loss": 0.4635, "step": 35070 }, { "epoch": 3.513647518405369, "grad_norm": 2.520565986633301, "learning_rate": 1.0171775581344044e-05, "loss": 0.438, "step": 35080 }, { "epoch": 3.514649171132368, "grad_norm": 2.132378578186035, "learning_rate": 1.0159086239744003e-05, "loss": 0.4628, "step": 35090 }, { "epoch": 3.5156508238593682, "grad_norm": 2.1328794956207275, "learning_rate": 1.0146402799515079e-05, "loss": 0.449, "step": 35100 }, { "epoch": 3.5166524765863674, "grad_norm": 2.2346129417419434, "learning_rate": 1.0133725265700728e-05, "loss": 0.4675, "step": 35110 }, { "epoch": 3.517654129313367, "grad_norm": 2.5284063816070557, "learning_rate": 1.0121053643342074e-05, "loss": 0.4691, "step": 35120 }, { "epoch": 3.5186557820403666, "grad_norm": 2.44215989112854, "learning_rate": 1.0108387937477886e-05, "loss": 0.4336, "step": 35130 }, { "epoch": 3.519657434767366, "grad_norm": 2.0735301971435547, "learning_rate": 1.0095728153144579e-05, "loss": 0.4179, "step": 35140 }, { "epoch": 3.5206590874943657, "grad_norm": 1.7370909452438354, "learning_rate": 1.0083074295376219e-05, "loss": 0.4225, "step": 35150 }, { "epoch": 3.5216607402213653, "grad_norm": 2.455852746963501, "learning_rate": 1.0070426369204511e-05, "loss": 0.4091, "step": 35160 }, { "epoch": 3.522662392948365, "grad_norm": 2.051345109939575, "learning_rate": 1.0057784379658797e-05, "loss": 0.4919, "step": 35170 }, { "epoch": 3.5236640456753645, "grad_norm": 2.2854480743408203, "learning_rate": 1.0045148331766069e-05, "loss": 0.4539, "step": 35180 }, { "epoch": 3.524665698402364, "grad_norm": 2.3247320652008057, "learning_rate": 1.0032518230550947e-05, "loss": 0.4547, "step": 35190 }, { "epoch": 3.5256673511293632, "grad_norm": 2.1312015056610107, "learning_rate": 1.0019894081035694e-05, "loss": 0.4494, "step": 35200 }, { "epoch": 3.526669003856363, "grad_norm": 1.9080195426940918, "learning_rate": 1.00072758882402e-05, "loss": 0.4012, "step": 35210 }, { "epoch": 3.5276706565833624, "grad_norm": 1.9371165037155151, "learning_rate": 9.994663657181991e-06, "loss": 0.4325, "step": 35220 }, { "epoch": 3.528672309310362, "grad_norm": 2.2770767211914062, "learning_rate": 9.982057392876215e-06, "loss": 0.4173, "step": 35230 }, { "epoch": 3.5296739620373616, "grad_norm": 2.719331741333008, "learning_rate": 9.969457100335658e-06, "loss": 0.4215, "step": 35240 }, { "epoch": 3.530675614764361, "grad_norm": 2.2385475635528564, "learning_rate": 9.956862784570726e-06, "loss": 0.4139, "step": 35250 }, { "epoch": 3.531677267491361, "grad_norm": 2.103825330734253, "learning_rate": 9.944274450589433e-06, "loss": 0.4366, "step": 35260 }, { "epoch": 3.5326789202183604, "grad_norm": 2.2733116149902344, "learning_rate": 9.931692103397464e-06, "loss": 0.4787, "step": 35270 }, { "epoch": 3.53368057294536, "grad_norm": 1.8610239028930664, "learning_rate": 9.919115747998059e-06, "loss": 0.4291, "step": 35280 }, { "epoch": 3.534682225672359, "grad_norm": 1.7383731603622437, "learning_rate": 9.906545389392105e-06, "loss": 0.4369, "step": 35290 }, { "epoch": 3.535683878399359, "grad_norm": 3.8377766609191895, "learning_rate": 9.89398103257813e-06, "loss": 0.4582, "step": 35300 }, { "epoch": 3.5366855311263583, "grad_norm": 1.6542972326278687, "learning_rate": 9.88142268255223e-06, "loss": 0.3911, "step": 35310 }, { "epoch": 3.537687183853358, "grad_norm": 1.7561484575271606, "learning_rate": 9.868870344308127e-06, "loss": 0.3694, "step": 35320 }, { "epoch": 3.5386888365803575, "grad_norm": 2.326295852661133, "learning_rate": 9.856324022837177e-06, "loss": 0.42, "step": 35330 }, { "epoch": 3.539690489307357, "grad_norm": 1.6023072004318237, "learning_rate": 9.843783723128328e-06, "loss": 0.4183, "step": 35340 }, { "epoch": 3.5406921420343567, "grad_norm": 2.0997536182403564, "learning_rate": 9.8312494501681e-06, "loss": 0.4093, "step": 35350 }, { "epoch": 3.5416937947613563, "grad_norm": 2.6246097087860107, "learning_rate": 9.818721208940674e-06, "loss": 0.4217, "step": 35360 }, { "epoch": 3.542695447488356, "grad_norm": 2.3198230266571045, "learning_rate": 9.806199004427807e-06, "loss": 0.4572, "step": 35370 }, { "epoch": 3.5436971002153554, "grad_norm": 2.1230669021606445, "learning_rate": 9.793682841608836e-06, "loss": 0.4245, "step": 35380 }, { "epoch": 3.544698752942355, "grad_norm": 2.395850896835327, "learning_rate": 9.781172725460711e-06, "loss": 0.4536, "step": 35390 }, { "epoch": 3.545700405669354, "grad_norm": 2.717364549636841, "learning_rate": 9.768668660957999e-06, "loss": 0.4774, "step": 35400 }, { "epoch": 3.546702058396354, "grad_norm": 2.4505763053894043, "learning_rate": 9.756170653072844e-06, "loss": 0.4056, "step": 35410 }, { "epoch": 3.5477037111233534, "grad_norm": 3.048994779586792, "learning_rate": 9.74367870677495e-06, "loss": 0.4456, "step": 35420 }, { "epoch": 3.548705363850353, "grad_norm": 2.351593017578125, "learning_rate": 9.731192827031666e-06, "loss": 0.4442, "step": 35430 }, { "epoch": 3.5497070165773525, "grad_norm": 1.9275217056274414, "learning_rate": 9.718713018807895e-06, "loss": 0.4316, "step": 35440 }, { "epoch": 3.550708669304352, "grad_norm": 2.4288742542266846, "learning_rate": 9.706239287066132e-06, "loss": 0.4615, "step": 35450 }, { "epoch": 3.5517103220313517, "grad_norm": 2.2295823097229004, "learning_rate": 9.69377163676646e-06, "loss": 0.4165, "step": 35460 }, { "epoch": 3.5527119747583513, "grad_norm": 2.53344464302063, "learning_rate": 9.68131007286654e-06, "loss": 0.483, "step": 35470 }, { "epoch": 3.553713627485351, "grad_norm": 2.220214366912842, "learning_rate": 9.668854600321612e-06, "loss": 0.4092, "step": 35480 }, { "epoch": 3.5547152802123505, "grad_norm": 2.255557060241699, "learning_rate": 9.656405224084494e-06, "loss": 0.4361, "step": 35490 }, { "epoch": 3.55571693293935, "grad_norm": 2.5812127590179443, "learning_rate": 9.643961949105584e-06, "loss": 0.4721, "step": 35500 }, { "epoch": 3.5567185856663492, "grad_norm": 1.8932257890701294, "learning_rate": 9.631524780332851e-06, "loss": 0.4126, "step": 35510 }, { "epoch": 3.5577202383933493, "grad_norm": 2.0898537635803223, "learning_rate": 9.619093722711833e-06, "loss": 0.4027, "step": 35520 }, { "epoch": 3.5587218911203484, "grad_norm": 2.0887091159820557, "learning_rate": 9.606668781185646e-06, "loss": 0.4123, "step": 35530 }, { "epoch": 3.559723543847348, "grad_norm": 2.2083592414855957, "learning_rate": 9.594249960694965e-06, "loss": 0.4965, "step": 35540 }, { "epoch": 3.5607251965743476, "grad_norm": 2.0207650661468506, "learning_rate": 9.58183726617804e-06, "loss": 0.4269, "step": 35550 }, { "epoch": 3.561726849301347, "grad_norm": 1.9305944442749023, "learning_rate": 9.569430702570672e-06, "loss": 0.4924, "step": 35560 }, { "epoch": 3.562728502028347, "grad_norm": 2.5445716381073, "learning_rate": 9.557030274806239e-06, "loss": 0.3865, "step": 35570 }, { "epoch": 3.5637301547553464, "grad_norm": 2.159574031829834, "learning_rate": 9.54463598781567e-06, "loss": 0.4323, "step": 35580 }, { "epoch": 3.564731807482346, "grad_norm": 2.1673364639282227, "learning_rate": 9.532247846527453e-06, "loss": 0.4703, "step": 35590 }, { "epoch": 3.5657334602093456, "grad_norm": 2.177016496658325, "learning_rate": 9.519865855867637e-06, "loss": 0.4123, "step": 35600 }, { "epoch": 3.566735112936345, "grad_norm": 1.9912105798721313, "learning_rate": 9.507490020759818e-06, "loss": 0.4372, "step": 35610 }, { "epoch": 3.5677367656633443, "grad_norm": 1.8226884603500366, "learning_rate": 9.495120346125145e-06, "loss": 0.4321, "step": 35620 }, { "epoch": 3.5687384183903443, "grad_norm": 1.9442039728164673, "learning_rate": 9.48275683688234e-06, "loss": 0.3922, "step": 35630 }, { "epoch": 3.5697400711173435, "grad_norm": 1.86778724193573, "learning_rate": 9.470399497947633e-06, "loss": 0.4015, "step": 35640 }, { "epoch": 3.570741723844343, "grad_norm": 2.015185832977295, "learning_rate": 9.458048334234818e-06, "loss": 0.4101, "step": 35650 }, { "epoch": 3.5717433765713427, "grad_norm": 2.0542356967926025, "learning_rate": 9.445703350655264e-06, "loss": 0.4151, "step": 35660 }, { "epoch": 3.5727450292983423, "grad_norm": 2.4986319541931152, "learning_rate": 9.433364552117829e-06, "loss": 0.4189, "step": 35670 }, { "epoch": 3.573746682025342, "grad_norm": 2.3909826278686523, "learning_rate": 9.421031943528941e-06, "loss": 0.4168, "step": 35680 }, { "epoch": 3.5747483347523414, "grad_norm": 2.0575599670410156, "learning_rate": 9.408705529792577e-06, "loss": 0.4215, "step": 35690 }, { "epoch": 3.575749987479341, "grad_norm": 2.2897729873657227, "learning_rate": 9.396385315810242e-06, "loss": 0.4642, "step": 35700 }, { "epoch": 3.5767516402063406, "grad_norm": 2.1848273277282715, "learning_rate": 9.384071306480947e-06, "loss": 0.4217, "step": 35710 }, { "epoch": 3.57775329293334, "grad_norm": 2.4846177101135254, "learning_rate": 9.371763506701265e-06, "loss": 0.423, "step": 35720 }, { "epoch": 3.5787549456603394, "grad_norm": 1.7969287633895874, "learning_rate": 9.359461921365307e-06, "loss": 0.4192, "step": 35730 }, { "epoch": 3.579756598387339, "grad_norm": 2.2867469787597656, "learning_rate": 9.347166555364704e-06, "loss": 0.461, "step": 35740 }, { "epoch": 3.5807582511143385, "grad_norm": 2.5535647869110107, "learning_rate": 9.334877413588583e-06, "loss": 0.4299, "step": 35750 }, { "epoch": 3.581759903841338, "grad_norm": 1.96112859249115, "learning_rate": 9.322594500923653e-06, "loss": 0.4267, "step": 35760 }, { "epoch": 3.5827615565683377, "grad_norm": 3.1883227825164795, "learning_rate": 9.31031782225411e-06, "loss": 0.4415, "step": 35770 }, { "epoch": 3.5837632092953373, "grad_norm": 2.1661252975463867, "learning_rate": 9.298047382461656e-06, "loss": 0.46, "step": 35780 }, { "epoch": 3.584764862022337, "grad_norm": 2.134152412414551, "learning_rate": 9.285783186425559e-06, "loss": 0.3904, "step": 35790 }, { "epoch": 3.5857665147493365, "grad_norm": 2.428616523742676, "learning_rate": 9.273525239022567e-06, "loss": 0.4525, "step": 35800 }, { "epoch": 3.586768167476336, "grad_norm": 2.4641940593719482, "learning_rate": 9.26127354512696e-06, "loss": 0.4485, "step": 35810 }, { "epoch": 3.5877698202033352, "grad_norm": 2.321436643600464, "learning_rate": 9.249028109610524e-06, "loss": 0.4743, "step": 35820 }, { "epoch": 3.5887714729303353, "grad_norm": 2.3309600353240967, "learning_rate": 9.236788937342558e-06, "loss": 0.4444, "step": 35830 }, { "epoch": 3.5897731256573344, "grad_norm": 1.9578757286071777, "learning_rate": 9.22455603318987e-06, "loss": 0.4182, "step": 35840 }, { "epoch": 3.590774778384334, "grad_norm": 1.8859965801239014, "learning_rate": 9.212329402016784e-06, "loss": 0.3995, "step": 35850 }, { "epoch": 3.5917764311113336, "grad_norm": 2.524366855621338, "learning_rate": 9.200109048685112e-06, "loss": 0.4419, "step": 35860 }, { "epoch": 3.592778083838333, "grad_norm": 2.6382808685302734, "learning_rate": 9.18789497805419e-06, "loss": 0.3921, "step": 35870 }, { "epoch": 3.593779736565333, "grad_norm": 2.853111743927002, "learning_rate": 9.175687194980839e-06, "loss": 0.459, "step": 35880 }, { "epoch": 3.5947813892923324, "grad_norm": 2.2526397705078125, "learning_rate": 9.163485704319392e-06, "loss": 0.4678, "step": 35890 }, { "epoch": 3.595783042019332, "grad_norm": 1.7041195631027222, "learning_rate": 9.15129051092167e-06, "loss": 0.4143, "step": 35900 }, { "epoch": 3.5967846947463316, "grad_norm": 1.7411357164382935, "learning_rate": 9.139101619636995e-06, "loss": 0.441, "step": 35910 }, { "epoch": 3.597786347473331, "grad_norm": 1.8171757459640503, "learning_rate": 9.126919035312186e-06, "loss": 0.4283, "step": 35920 }, { "epoch": 3.5987880002003303, "grad_norm": 2.0983874797821045, "learning_rate": 9.114742762791547e-06, "loss": 0.4392, "step": 35930 }, { "epoch": 3.5997896529273303, "grad_norm": 2.409411907196045, "learning_rate": 9.102572806916876e-06, "loss": 0.4293, "step": 35940 }, { "epoch": 3.6007913056543295, "grad_norm": 1.5346509218215942, "learning_rate": 9.090409172527462e-06, "loss": 0.3898, "step": 35950 }, { "epoch": 3.601792958381329, "grad_norm": 1.5412535667419434, "learning_rate": 9.078251864460074e-06, "loss": 0.3976, "step": 35960 }, { "epoch": 3.6027946111083287, "grad_norm": 1.62209153175354, "learning_rate": 9.066100887548972e-06, "loss": 0.4361, "step": 35970 }, { "epoch": 3.6037962638353283, "grad_norm": 2.0511932373046875, "learning_rate": 9.053956246625884e-06, "loss": 0.4037, "step": 35980 }, { "epoch": 3.604797916562328, "grad_norm": 1.7861130237579346, "learning_rate": 9.041817946520054e-06, "loss": 0.4244, "step": 35990 }, { "epoch": 3.6057995692893274, "grad_norm": 1.4258286952972412, "learning_rate": 9.029685992058159e-06, "loss": 0.4323, "step": 36000 }, { "epoch": 3.606801222016327, "grad_norm": 1.9314757585525513, "learning_rate": 9.01756038806437e-06, "loss": 0.4408, "step": 36010 }, { "epoch": 3.6078028747433266, "grad_norm": 1.7380179166793823, "learning_rate": 9.005441139360358e-06, "loss": 0.3859, "step": 36020 }, { "epoch": 3.608804527470326, "grad_norm": 2.3640384674072266, "learning_rate": 8.99332825076524e-06, "loss": 0.4027, "step": 36030 }, { "epoch": 3.6098061801973254, "grad_norm": 3.133868455886841, "learning_rate": 8.981221727095598e-06, "loss": 0.4192, "step": 36040 }, { "epoch": 3.6108078329243254, "grad_norm": 1.5783082246780396, "learning_rate": 8.969121573165492e-06, "loss": 0.4265, "step": 36050 }, { "epoch": 3.6118094856513245, "grad_norm": 1.7218824625015259, "learning_rate": 8.957027793786477e-06, "loss": 0.4918, "step": 36060 }, { "epoch": 3.612811138378324, "grad_norm": 2.190296173095703, "learning_rate": 8.944940393767524e-06, "loss": 0.4599, "step": 36070 }, { "epoch": 3.6138127911053237, "grad_norm": 1.9028578996658325, "learning_rate": 8.93285937791509e-06, "loss": 0.3851, "step": 36080 }, { "epoch": 3.6148144438323233, "grad_norm": 2.483093500137329, "learning_rate": 8.920784751033115e-06, "loss": 0.5199, "step": 36090 }, { "epoch": 3.615816096559323, "grad_norm": 2.389422655105591, "learning_rate": 8.908716517922972e-06, "loss": 0.4206, "step": 36100 }, { "epoch": 3.6168177492863225, "grad_norm": 2.4605555534362793, "learning_rate": 8.89665468338348e-06, "loss": 0.3872, "step": 36110 }, { "epoch": 3.617819402013322, "grad_norm": 2.5835015773773193, "learning_rate": 8.884599252210948e-06, "loss": 0.422, "step": 36120 }, { "epoch": 3.6188210547403217, "grad_norm": 2.2940011024475098, "learning_rate": 8.872550229199128e-06, "loss": 0.4322, "step": 36130 }, { "epoch": 3.6198227074673213, "grad_norm": 2.135439872741699, "learning_rate": 8.860507619139194e-06, "loss": 0.3873, "step": 36140 }, { "epoch": 3.6208243601943204, "grad_norm": 2.894531488418579, "learning_rate": 8.848471426819813e-06, "loss": 0.4451, "step": 36150 }, { "epoch": 3.6218260129213204, "grad_norm": 2.076733350753784, "learning_rate": 8.836441657027078e-06, "loss": 0.4245, "step": 36160 }, { "epoch": 3.6228276656483196, "grad_norm": 2.015641689300537, "learning_rate": 8.82441831454453e-06, "loss": 0.484, "step": 36170 }, { "epoch": 3.623829318375319, "grad_norm": 2.4164772033691406, "learning_rate": 8.812401404153153e-06, "loss": 0.4671, "step": 36180 }, { "epoch": 3.6248309711023188, "grad_norm": 2.1226491928100586, "learning_rate": 8.80039093063138e-06, "loss": 0.3972, "step": 36190 }, { "epoch": 3.6258326238293184, "grad_norm": 2.2570536136627197, "learning_rate": 8.788386898755075e-06, "loss": 0.4411, "step": 36200 }, { "epoch": 3.626834276556318, "grad_norm": 2.020395040512085, "learning_rate": 8.776389313297551e-06, "loss": 0.4889, "step": 36210 }, { "epoch": 3.6278359292833176, "grad_norm": 2.354189157485962, "learning_rate": 8.764398179029546e-06, "loss": 0.4833, "step": 36220 }, { "epoch": 3.628837582010317, "grad_norm": 1.986047625541687, "learning_rate": 8.752413500719242e-06, "loss": 0.3904, "step": 36230 }, { "epoch": 3.6298392347373167, "grad_norm": 1.9563875198364258, "learning_rate": 8.740435283132253e-06, "loss": 0.4306, "step": 36240 }, { "epoch": 3.6308408874643163, "grad_norm": 2.950375556945801, "learning_rate": 8.72846353103162e-06, "loss": 0.4219, "step": 36250 }, { "epoch": 3.6318425401913155, "grad_norm": 2.0507826805114746, "learning_rate": 8.716498249177814e-06, "loss": 0.4712, "step": 36260 }, { "epoch": 3.632844192918315, "grad_norm": 2.2555625438690186, "learning_rate": 8.704539442328736e-06, "loss": 0.4081, "step": 36270 }, { "epoch": 3.6338458456453147, "grad_norm": 1.7647645473480225, "learning_rate": 8.692587115239706e-06, "loss": 0.425, "step": 36280 }, { "epoch": 3.6348474983723142, "grad_norm": 2.237157106399536, "learning_rate": 8.680641272663479e-06, "loss": 0.3712, "step": 36290 }, { "epoch": 3.635849151099314, "grad_norm": 1.650541067123413, "learning_rate": 8.668701919350216e-06, "loss": 0.4203, "step": 36300 }, { "epoch": 3.6368508038263134, "grad_norm": 2.149442672729492, "learning_rate": 8.656769060047504e-06, "loss": 0.3975, "step": 36310 }, { "epoch": 3.637852456553313, "grad_norm": 1.5350333452224731, "learning_rate": 8.64484269950037e-06, "loss": 0.3873, "step": 36320 }, { "epoch": 3.6388541092803126, "grad_norm": 2.2094063758850098, "learning_rate": 8.632922842451212e-06, "loss": 0.3724, "step": 36330 }, { "epoch": 3.639855762007312, "grad_norm": 2.3353826999664307, "learning_rate": 8.621009493639867e-06, "loss": 0.4148, "step": 36340 }, { "epoch": 3.6408574147343113, "grad_norm": 2.6725997924804688, "learning_rate": 8.60910265780361e-06, "loss": 0.4197, "step": 36350 }, { "epoch": 3.6418590674613114, "grad_norm": 2.070082664489746, "learning_rate": 8.597202339677071e-06, "loss": 0.3991, "step": 36360 }, { "epoch": 3.6428607201883105, "grad_norm": 2.181931972503662, "learning_rate": 8.585308543992329e-06, "loss": 0.4527, "step": 36370 }, { "epoch": 3.64386237291531, "grad_norm": 1.89093017578125, "learning_rate": 8.573421275478844e-06, "loss": 0.4001, "step": 36380 }, { "epoch": 3.6448640256423097, "grad_norm": 3.096177101135254, "learning_rate": 8.561540538863522e-06, "loss": 0.4488, "step": 36390 }, { "epoch": 3.6458656783693093, "grad_norm": 2.1697757244110107, "learning_rate": 8.549666338870619e-06, "loss": 0.4258, "step": 36400 }, { "epoch": 3.646867331096309, "grad_norm": 2.5357141494750977, "learning_rate": 8.537798680221808e-06, "loss": 0.4039, "step": 36410 }, { "epoch": 3.6478689838233085, "grad_norm": 2.1486408710479736, "learning_rate": 8.525937567636208e-06, "loss": 0.4074, "step": 36420 }, { "epoch": 3.648870636550308, "grad_norm": 2.1422061920166016, "learning_rate": 8.514083005830259e-06, "loss": 0.449, "step": 36430 }, { "epoch": 3.6498722892773077, "grad_norm": 2.3769538402557373, "learning_rate": 8.502234999517839e-06, "loss": 0.4385, "step": 36440 }, { "epoch": 3.6508739420043073, "grad_norm": 2.6000874042510986, "learning_rate": 8.490393553410225e-06, "loss": 0.4101, "step": 36450 }, { "epoch": 3.6518755947313064, "grad_norm": 2.888254165649414, "learning_rate": 8.478558672216078e-06, "loss": 0.4509, "step": 36460 }, { "epoch": 3.6528772474583064, "grad_norm": 1.998104214668274, "learning_rate": 8.46673036064142e-06, "loss": 0.3462, "step": 36470 }, { "epoch": 3.6538789001853056, "grad_norm": 1.9908857345581055, "learning_rate": 8.454908623389706e-06, "loss": 0.4169, "step": 36480 }, { "epoch": 3.654880552912305, "grad_norm": 2.254089593887329, "learning_rate": 8.44309346516175e-06, "loss": 0.4894, "step": 36490 }, { "epoch": 3.6558822056393048, "grad_norm": 2.5244266986846924, "learning_rate": 8.431284890655752e-06, "loss": 0.5155, "step": 36500 }, { "epoch": 3.6568838583663044, "grad_norm": 2.26857328414917, "learning_rate": 8.419482904567305e-06, "loss": 0.4682, "step": 36510 }, { "epoch": 3.657885511093304, "grad_norm": 3.812756299972534, "learning_rate": 8.407687511589366e-06, "loss": 0.4426, "step": 36520 }, { "epoch": 3.6588871638203035, "grad_norm": 2.134424924850464, "learning_rate": 8.395898716412293e-06, "loss": 0.4639, "step": 36530 }, { "epoch": 3.659888816547303, "grad_norm": 1.799182653427124, "learning_rate": 8.384116523723778e-06, "loss": 0.4578, "step": 36540 }, { "epoch": 3.6608904692743027, "grad_norm": 2.2345874309539795, "learning_rate": 8.372340938208948e-06, "loss": 0.4465, "step": 36550 }, { "epoch": 3.6618921220013023, "grad_norm": 1.7349672317504883, "learning_rate": 8.36057196455025e-06, "loss": 0.445, "step": 36560 }, { "epoch": 3.6628937747283015, "grad_norm": 2.1134767532348633, "learning_rate": 8.348809607427535e-06, "loss": 0.4594, "step": 36570 }, { "epoch": 3.6638954274553015, "grad_norm": 2.5449306964874268, "learning_rate": 8.337053871518005e-06, "loss": 0.4286, "step": 36580 }, { "epoch": 3.6648970801823006, "grad_norm": 2.225423812866211, "learning_rate": 8.325304761496234e-06, "loss": 0.4111, "step": 36590 }, { "epoch": 3.6658987329093002, "grad_norm": 1.876946210861206, "learning_rate": 8.313562282034162e-06, "loss": 0.419, "step": 36600 }, { "epoch": 3.6669003856363, "grad_norm": 2.1918551921844482, "learning_rate": 8.301826437801097e-06, "loss": 0.4192, "step": 36610 }, { "epoch": 3.6679020383632994, "grad_norm": 2.6870341300964355, "learning_rate": 8.290097233463697e-06, "loss": 0.386, "step": 36620 }, { "epoch": 3.668903691090299, "grad_norm": 1.8027386665344238, "learning_rate": 8.278374673685993e-06, "loss": 0.4481, "step": 36630 }, { "epoch": 3.6699053438172986, "grad_norm": 1.7095636129379272, "learning_rate": 8.266658763129362e-06, "loss": 0.4075, "step": 36640 }, { "epoch": 3.670906996544298, "grad_norm": 1.323539137840271, "learning_rate": 8.254949506452548e-06, "loss": 0.4143, "step": 36650 }, { "epoch": 3.671908649271298, "grad_norm": 2.6137101650238037, "learning_rate": 8.243246908311639e-06, "loss": 0.4067, "step": 36660 }, { "epoch": 3.6729103019982974, "grad_norm": 2.2960166931152344, "learning_rate": 8.231550973360072e-06, "loss": 0.4166, "step": 36670 }, { "epoch": 3.6739119547252965, "grad_norm": 1.6058300733566284, "learning_rate": 8.219861706248672e-06, "loss": 0.4128, "step": 36680 }, { "epoch": 3.6749136074522966, "grad_norm": 1.9703116416931152, "learning_rate": 8.208179111625552e-06, "loss": 0.4412, "step": 36690 }, { "epoch": 3.6759152601792957, "grad_norm": 2.711176633834839, "learning_rate": 8.196503194136213e-06, "loss": 0.4466, "step": 36700 }, { "epoch": 3.6769169129062953, "grad_norm": 2.1494076251983643, "learning_rate": 8.184833958423494e-06, "loss": 0.4272, "step": 36710 }, { "epoch": 3.677918565633295, "grad_norm": 2.330552339553833, "learning_rate": 8.173171409127566e-06, "loss": 0.4899, "step": 36720 }, { "epoch": 3.6789202183602945, "grad_norm": 2.2153825759887695, "learning_rate": 8.161515550885959e-06, "loss": 0.3723, "step": 36730 }, { "epoch": 3.679921871087294, "grad_norm": 1.7207276821136475, "learning_rate": 8.149866388333515e-06, "loss": 0.397, "step": 36740 }, { "epoch": 3.6809235238142937, "grad_norm": 2.033440351486206, "learning_rate": 8.138223926102462e-06, "loss": 0.4444, "step": 36750 }, { "epoch": 3.6819251765412933, "grad_norm": 2.009399175643921, "learning_rate": 8.126588168822303e-06, "loss": 0.4054, "step": 36760 }, { "epoch": 3.682926829268293, "grad_norm": 1.952618956565857, "learning_rate": 8.114959121119903e-06, "loss": 0.3859, "step": 36770 }, { "epoch": 3.6839284819952924, "grad_norm": 1.7253168821334839, "learning_rate": 8.10333678761948e-06, "loss": 0.3954, "step": 36780 }, { "epoch": 3.6849301347222916, "grad_norm": 1.8752772808074951, "learning_rate": 8.091721172942566e-06, "loss": 0.4315, "step": 36790 }, { "epoch": 3.685931787449291, "grad_norm": 1.8806124925613403, "learning_rate": 8.080112281707985e-06, "loss": 0.4244, "step": 36800 }, { "epoch": 3.6869334401762908, "grad_norm": 2.6830246448516846, "learning_rate": 8.068510118531949e-06, "loss": 0.4661, "step": 36810 }, { "epoch": 3.6879350929032904, "grad_norm": 2.2413887977600098, "learning_rate": 8.056914688027964e-06, "loss": 0.4661, "step": 36820 }, { "epoch": 3.68893674563029, "grad_norm": 2.0021140575408936, "learning_rate": 8.045325994806838e-06, "loss": 0.4098, "step": 36830 }, { "epoch": 3.6899383983572895, "grad_norm": 1.5996559858322144, "learning_rate": 8.033744043476749e-06, "loss": 0.4021, "step": 36840 }, { "epoch": 3.690940051084289, "grad_norm": 3.1006476879119873, "learning_rate": 8.022168838643152e-06, "loss": 0.4296, "step": 36850 }, { "epoch": 3.6919417038112887, "grad_norm": 2.16410231590271, "learning_rate": 8.010600384908848e-06, "loss": 0.4847, "step": 36860 }, { "epoch": 3.6929433565382883, "grad_norm": 2.522000312805176, "learning_rate": 7.99903868687392e-06, "loss": 0.4312, "step": 36870 }, { "epoch": 3.6939450092652875, "grad_norm": 1.8807637691497803, "learning_rate": 7.9874837491358e-06, "loss": 0.5074, "step": 36880 }, { "epoch": 3.6949466619922875, "grad_norm": 1.5786900520324707, "learning_rate": 7.975935576289218e-06, "loss": 0.3864, "step": 36890 }, { "epoch": 3.6959483147192866, "grad_norm": 2.4154186248779297, "learning_rate": 7.964394172926206e-06, "loss": 0.4542, "step": 36900 }, { "epoch": 3.6969499674462862, "grad_norm": 1.6693038940429688, "learning_rate": 7.952859543636116e-06, "loss": 0.4141, "step": 36910 }, { "epoch": 3.697951620173286, "grad_norm": 2.0245635509490967, "learning_rate": 7.941331693005599e-06, "loss": 0.4179, "step": 36920 }, { "epoch": 3.6989532729002854, "grad_norm": 1.8546513319015503, "learning_rate": 7.929810625618612e-06, "loss": 0.4161, "step": 36930 }, { "epoch": 3.699954925627285, "grad_norm": 1.8944015502929688, "learning_rate": 7.91829634605642e-06, "loss": 0.432, "step": 36940 }, { "epoch": 3.7009565783542846, "grad_norm": 2.0617423057556152, "learning_rate": 7.906788858897579e-06, "loss": 0.4466, "step": 36950 }, { "epoch": 3.701958231081284, "grad_norm": 2.174147129058838, "learning_rate": 7.895288168717951e-06, "loss": 0.4087, "step": 36960 }, { "epoch": 3.702959883808284, "grad_norm": 2.1996078491210938, "learning_rate": 7.883794280090698e-06, "loss": 0.3939, "step": 36970 }, { "epoch": 3.7039615365352834, "grad_norm": 1.9552979469299316, "learning_rate": 7.872307197586271e-06, "loss": 0.4228, "step": 36980 }, { "epoch": 3.7049631892622825, "grad_norm": 2.0555949211120605, "learning_rate": 7.860826925772414e-06, "loss": 0.4539, "step": 36990 }, { "epoch": 3.7059648419892826, "grad_norm": 2.5038888454437256, "learning_rate": 7.849353469214165e-06, "loss": 0.4057, "step": 37000 }, { "epoch": 3.7069664947162817, "grad_norm": 2.1329166889190674, "learning_rate": 7.837886832473859e-06, "loss": 0.4751, "step": 37010 }, { "epoch": 3.7079681474432813, "grad_norm": 2.378232479095459, "learning_rate": 7.826427020111107e-06, "loss": 0.4125, "step": 37020 }, { "epoch": 3.708969800170281, "grad_norm": 2.8863255977630615, "learning_rate": 7.814974036682814e-06, "loss": 0.4706, "step": 37030 }, { "epoch": 3.7099714528972805, "grad_norm": 1.9137957096099854, "learning_rate": 7.80352788674317e-06, "loss": 0.4149, "step": 37040 }, { "epoch": 3.71097310562428, "grad_norm": 1.9049749374389648, "learning_rate": 7.792088574843643e-06, "loss": 0.3909, "step": 37050 }, { "epoch": 3.7119747583512797, "grad_norm": 2.699704647064209, "learning_rate": 7.780656105532983e-06, "loss": 0.4085, "step": 37060 }, { "epoch": 3.7129764110782792, "grad_norm": 2.358980178833008, "learning_rate": 7.769230483357212e-06, "loss": 0.4699, "step": 37070 }, { "epoch": 3.713978063805279, "grad_norm": 2.233934164047241, "learning_rate": 7.757811712859665e-06, "loss": 0.4224, "step": 37080 }, { "epoch": 3.7149797165322784, "grad_norm": 2.1187474727630615, "learning_rate": 7.7463997985809e-06, "loss": 0.4722, "step": 37090 }, { "epoch": 3.7159813692592776, "grad_norm": 2.188882350921631, "learning_rate": 7.734994745058771e-06, "loss": 0.4373, "step": 37100 }, { "epoch": 3.7169830219862776, "grad_norm": 2.354186773300171, "learning_rate": 7.723596556828434e-06, "loss": 0.4367, "step": 37110 }, { "epoch": 3.7179846747132768, "grad_norm": 2.162371873855591, "learning_rate": 7.71220523842226e-06, "loss": 0.3975, "step": 37120 }, { "epoch": 3.7189863274402764, "grad_norm": 2.0304088592529297, "learning_rate": 7.700820794369923e-06, "loss": 0.4807, "step": 37130 }, { "epoch": 3.719987980167276, "grad_norm": 1.8520346879959106, "learning_rate": 7.689443229198365e-06, "loss": 0.4204, "step": 37140 }, { "epoch": 3.7209896328942755, "grad_norm": 1.8495360612869263, "learning_rate": 7.678072547431787e-06, "loss": 0.4181, "step": 37150 }, { "epoch": 3.721991285621275, "grad_norm": 2.0318777561187744, "learning_rate": 7.666708753591626e-06, "loss": 0.4456, "step": 37160 }, { "epoch": 3.7229929383482747, "grad_norm": 2.1289327144622803, "learning_rate": 7.655351852196627e-06, "loss": 0.434, "step": 37170 }, { "epoch": 3.7239945910752743, "grad_norm": 2.1364686489105225, "learning_rate": 7.644001847762774e-06, "loss": 0.4215, "step": 37180 }, { "epoch": 3.724996243802274, "grad_norm": 2.0084264278411865, "learning_rate": 7.63265874480329e-06, "loss": 0.3936, "step": 37190 }, { "epoch": 3.7259978965292735, "grad_norm": 1.8063205480575562, "learning_rate": 7.621322547828663e-06, "loss": 0.3598, "step": 37200 }, { "epoch": 3.7269995492562726, "grad_norm": 2.089787006378174, "learning_rate": 7.609993261346668e-06, "loss": 0.4059, "step": 37210 }, { "epoch": 3.7280012019832727, "grad_norm": 1.835331678390503, "learning_rate": 7.598670889862297e-06, "loss": 0.4411, "step": 37220 }, { "epoch": 3.729002854710272, "grad_norm": 2.2048423290252686, "learning_rate": 7.587355437877777e-06, "loss": 0.4427, "step": 37230 }, { "epoch": 3.7300045074372714, "grad_norm": 2.705153703689575, "learning_rate": 7.576046909892637e-06, "loss": 0.4109, "step": 37240 }, { "epoch": 3.731006160164271, "grad_norm": 1.9492063522338867, "learning_rate": 7.564745310403612e-06, "loss": 0.3932, "step": 37250 }, { "epoch": 3.7320078128912706, "grad_norm": 2.270702600479126, "learning_rate": 7.553450643904692e-06, "loss": 0.3898, "step": 37260 }, { "epoch": 3.73300946561827, "grad_norm": 1.9737622737884521, "learning_rate": 7.542162914887111e-06, "loss": 0.4094, "step": 37270 }, { "epoch": 3.7340111183452698, "grad_norm": 2.8277175426483154, "learning_rate": 7.530882127839348e-06, "loss": 0.3824, "step": 37280 }, { "epoch": 3.7350127710722694, "grad_norm": 2.938880205154419, "learning_rate": 7.519608287247113e-06, "loss": 0.4285, "step": 37290 }, { "epoch": 3.736014423799269, "grad_norm": 2.2028396129608154, "learning_rate": 7.508341397593363e-06, "loss": 0.3687, "step": 37300 }, { "epoch": 3.7370160765262685, "grad_norm": 2.1324620246887207, "learning_rate": 7.497081463358286e-06, "loss": 0.4436, "step": 37310 }, { "epoch": 3.7380177292532677, "grad_norm": 2.0066702365875244, "learning_rate": 7.485828489019303e-06, "loss": 0.369, "step": 37320 }, { "epoch": 3.7390193819802673, "grad_norm": 2.5639231204986572, "learning_rate": 7.47458247905107e-06, "loss": 0.4182, "step": 37330 }, { "epoch": 3.740021034707267, "grad_norm": 2.4227943420410156, "learning_rate": 7.463343437925477e-06, "loss": 0.4117, "step": 37340 }, { "epoch": 3.7410226874342665, "grad_norm": 1.719071626663208, "learning_rate": 7.452111370111636e-06, "loss": 0.4173, "step": 37350 }, { "epoch": 3.742024340161266, "grad_norm": 2.1645774841308594, "learning_rate": 7.440886280075887e-06, "loss": 0.4216, "step": 37360 }, { "epoch": 3.7430259928882657, "grad_norm": 2.005495309829712, "learning_rate": 7.429668172281803e-06, "loss": 0.4413, "step": 37370 }, { "epoch": 3.7440276456152652, "grad_norm": 2.228274345397949, "learning_rate": 7.418457051190173e-06, "loss": 0.3805, "step": 37380 }, { "epoch": 3.745029298342265, "grad_norm": 2.2738568782806396, "learning_rate": 7.407252921259008e-06, "loss": 0.4104, "step": 37390 }, { "epoch": 3.7460309510692644, "grad_norm": 2.142463445663452, "learning_rate": 7.396055786943543e-06, "loss": 0.4443, "step": 37400 }, { "epoch": 3.7470326037962636, "grad_norm": 2.024423122406006, "learning_rate": 7.3848656526962295e-06, "loss": 0.3953, "step": 37410 }, { "epoch": 3.7480342565232636, "grad_norm": 2.5991528034210205, "learning_rate": 7.373682522966735e-06, "loss": 0.4175, "step": 37420 }, { "epoch": 3.7490359092502628, "grad_norm": 1.819032907485962, "learning_rate": 7.362506402201932e-06, "loss": 0.46, "step": 37430 }, { "epoch": 3.7500375619772623, "grad_norm": 2.4464993476867676, "learning_rate": 7.351337294845942e-06, "loss": 0.4255, "step": 37440 }, { "epoch": 3.751039214704262, "grad_norm": 2.157233715057373, "learning_rate": 7.340175205340044e-06, "loss": 0.4405, "step": 37450 }, { "epoch": 3.7520408674312615, "grad_norm": 2.148372173309326, "learning_rate": 7.329020138122761e-06, "loss": 0.4301, "step": 37460 }, { "epoch": 3.753042520158261, "grad_norm": 2.149197578430176, "learning_rate": 7.317872097629836e-06, "loss": 0.4719, "step": 37470 }, { "epoch": 3.7540441728852607, "grad_norm": 2.0958821773529053, "learning_rate": 7.306731088294172e-06, "loss": 0.4272, "step": 37480 }, { "epoch": 3.7550458256122603, "grad_norm": 2.766040086746216, "learning_rate": 7.295597114545907e-06, "loss": 0.4578, "step": 37490 }, { "epoch": 3.75604747833926, "grad_norm": 2.5304043292999268, "learning_rate": 7.284470180812392e-06, "loss": 0.4636, "step": 37500 }, { "epoch": 3.7570491310662595, "grad_norm": 2.753812313079834, "learning_rate": 7.2733502915181604e-06, "loss": 0.4124, "step": 37510 }, { "epoch": 3.7580507837932586, "grad_norm": 1.706272840499878, "learning_rate": 7.262237451084938e-06, "loss": 0.3704, "step": 37520 }, { "epoch": 3.7590524365202587, "grad_norm": 2.44985294342041, "learning_rate": 7.251131663931652e-06, "loss": 0.4415, "step": 37530 }, { "epoch": 3.760054089247258, "grad_norm": 1.9321026802062988, "learning_rate": 7.240032934474447e-06, "loss": 0.4082, "step": 37540 }, { "epoch": 3.7610557419742574, "grad_norm": 2.1816911697387695, "learning_rate": 7.228941267126646e-06, "loss": 0.4284, "step": 37550 }, { "epoch": 3.762057394701257, "grad_norm": 2.1587181091308594, "learning_rate": 7.217856666298736e-06, "loss": 0.4536, "step": 37560 }, { "epoch": 3.7630590474282566, "grad_norm": 2.2277698516845703, "learning_rate": 7.206779136398445e-06, "loss": 0.3842, "step": 37570 }, { "epoch": 3.764060700155256, "grad_norm": 2.4164507389068604, "learning_rate": 7.195708681830665e-06, "loss": 0.3984, "step": 37580 }, { "epoch": 3.7650623528822558, "grad_norm": 2.4073264598846436, "learning_rate": 7.18464530699745e-06, "loss": 0.3684, "step": 37590 }, { "epoch": 3.7660640056092554, "grad_norm": 2.5888216495513916, "learning_rate": 7.1735890162980855e-06, "loss": 0.4355, "step": 37600 }, { "epoch": 3.767065658336255, "grad_norm": 2.342641592025757, "learning_rate": 7.162539814129013e-06, "loss": 0.4305, "step": 37610 }, { "epoch": 3.7680673110632545, "grad_norm": 2.2944300174713135, "learning_rate": 7.151497704883855e-06, "loss": 0.4233, "step": 37620 }, { "epoch": 3.7690689637902537, "grad_norm": 2.190720796585083, "learning_rate": 7.1404626929534206e-06, "loss": 0.4602, "step": 37630 }, { "epoch": 3.7700706165172537, "grad_norm": 2.474369764328003, "learning_rate": 7.129434782725697e-06, "loss": 0.4556, "step": 37640 }, { "epoch": 3.771072269244253, "grad_norm": 3.5644686222076416, "learning_rate": 7.118413978585839e-06, "loss": 0.4676, "step": 37650 }, { "epoch": 3.7720739219712525, "grad_norm": 2.2803492546081543, "learning_rate": 7.107400284916185e-06, "loss": 0.4021, "step": 37660 }, { "epoch": 3.773075574698252, "grad_norm": 2.6325793266296387, "learning_rate": 7.0963937060962435e-06, "loss": 0.4832, "step": 37670 }, { "epoch": 3.7740772274252516, "grad_norm": 2.1822357177734375, "learning_rate": 7.085394246502692e-06, "loss": 0.4158, "step": 37680 }, { "epoch": 3.7750788801522512, "grad_norm": 2.2507176399230957, "learning_rate": 7.074401910509376e-06, "loss": 0.4426, "step": 37690 }, { "epoch": 3.776080532879251, "grad_norm": 1.774174690246582, "learning_rate": 7.063416702487313e-06, "loss": 0.3955, "step": 37700 }, { "epoch": 3.7770821856062504, "grad_norm": 2.155637264251709, "learning_rate": 7.05243862680468e-06, "loss": 0.3967, "step": 37710 }, { "epoch": 3.77808383833325, "grad_norm": 2.492680311203003, "learning_rate": 7.041467687826825e-06, "loss": 0.4521, "step": 37720 }, { "epoch": 3.7790854910602496, "grad_norm": 1.9159152507781982, "learning_rate": 7.030503889916254e-06, "loss": 0.4482, "step": 37730 }, { "epoch": 3.7800871437872487, "grad_norm": 2.340388298034668, "learning_rate": 7.0195472374326335e-06, "loss": 0.4927, "step": 37740 }, { "epoch": 3.781088796514249, "grad_norm": 2.2713913917541504, "learning_rate": 7.008597734732786e-06, "loss": 0.4053, "step": 37750 }, { "epoch": 3.782090449241248, "grad_norm": 1.636338710784912, "learning_rate": 6.997655386170698e-06, "loss": 0.4071, "step": 37760 }, { "epoch": 3.7830921019682475, "grad_norm": 1.8299113512039185, "learning_rate": 6.986720196097507e-06, "loss": 0.4461, "step": 37770 }, { "epoch": 3.784093754695247, "grad_norm": 2.3852038383483887, "learning_rate": 6.9757921688615e-06, "loss": 0.4631, "step": 37780 }, { "epoch": 3.7850954074222467, "grad_norm": 2.396932363510132, "learning_rate": 6.964871308808118e-06, "loss": 0.4139, "step": 37790 }, { "epoch": 3.7860970601492463, "grad_norm": 1.9309535026550293, "learning_rate": 6.953957620279971e-06, "loss": 0.4465, "step": 37800 }, { "epoch": 3.787098712876246, "grad_norm": 2.1654818058013916, "learning_rate": 6.943051107616785e-06, "loss": 0.3709, "step": 37810 }, { "epoch": 3.7881003656032455, "grad_norm": 3.310075283050537, "learning_rate": 6.93215177515544e-06, "loss": 0.4081, "step": 37820 }, { "epoch": 3.789102018330245, "grad_norm": 2.255657434463501, "learning_rate": 6.921259627229989e-06, "loss": 0.4686, "step": 37830 }, { "epoch": 3.7901036710572447, "grad_norm": 1.773685336112976, "learning_rate": 6.910374668171607e-06, "loss": 0.4184, "step": 37840 }, { "epoch": 3.791105323784244, "grad_norm": 1.9942299127578735, "learning_rate": 6.899496902308592e-06, "loss": 0.4079, "step": 37850 }, { "epoch": 3.7921069765112434, "grad_norm": 2.451260805130005, "learning_rate": 6.888626333966405e-06, "loss": 0.4615, "step": 37860 }, { "epoch": 3.793108629238243, "grad_norm": 2.6214842796325684, "learning_rate": 6.877762967467666e-06, "loss": 0.4257, "step": 37870 }, { "epoch": 3.7941102819652426, "grad_norm": 2.6001267433166504, "learning_rate": 6.866906807132079e-06, "loss": 0.4699, "step": 37880 }, { "epoch": 3.795111934692242, "grad_norm": 2.7137014865875244, "learning_rate": 6.856057857276507e-06, "loss": 0.4248, "step": 37890 }, { "epoch": 3.7961135874192418, "grad_norm": 2.153409957885742, "learning_rate": 6.845216122214973e-06, "loss": 0.4262, "step": 37900 }, { "epoch": 3.7971152401462414, "grad_norm": 2.0230278968811035, "learning_rate": 6.834381606258597e-06, "loss": 0.4167, "step": 37910 }, { "epoch": 3.798116892873241, "grad_norm": 1.8823751211166382, "learning_rate": 6.823554313715619e-06, "loss": 0.4419, "step": 37920 }, { "epoch": 3.7991185456002405, "grad_norm": 2.529693126678467, "learning_rate": 6.81273424889145e-06, "loss": 0.4636, "step": 37930 }, { "epoch": 3.8001201983272397, "grad_norm": 2.7344167232513428, "learning_rate": 6.801921416088597e-06, "loss": 0.417, "step": 37940 }, { "epoch": 3.8011218510542397, "grad_norm": 2.9459967613220215, "learning_rate": 6.791115819606689e-06, "loss": 0.4455, "step": 37950 }, { "epoch": 3.802123503781239, "grad_norm": 2.277332305908203, "learning_rate": 6.780317463742492e-06, "loss": 0.4097, "step": 37960 }, { "epoch": 3.8031251565082385, "grad_norm": 2.0806617736816406, "learning_rate": 6.769526352789882e-06, "loss": 0.4105, "step": 37970 }, { "epoch": 3.804126809235238, "grad_norm": 2.3580572605133057, "learning_rate": 6.75874249103986e-06, "loss": 0.4319, "step": 37980 }, { "epoch": 3.8051284619622376, "grad_norm": 2.0618369579315186, "learning_rate": 6.7479658827805435e-06, "loss": 0.4417, "step": 37990 }, { "epoch": 3.8061301146892372, "grad_norm": 2.297889471054077, "learning_rate": 6.737196532297163e-06, "loss": 0.4363, "step": 38000 }, { "epoch": 3.807131767416237, "grad_norm": 1.9872561693191528, "learning_rate": 6.726434443872071e-06, "loss": 0.4912, "step": 38010 }, { "epoch": 3.8081334201432364, "grad_norm": 1.8478449583053589, "learning_rate": 6.7156796217847155e-06, "loss": 0.3763, "step": 38020 }, { "epoch": 3.809135072870236, "grad_norm": 1.7361677885055542, "learning_rate": 6.7049320703116756e-06, "loss": 0.4135, "step": 38030 }, { "epoch": 3.8101367255972356, "grad_norm": 2.6249921321868896, "learning_rate": 6.694191793726623e-06, "loss": 0.3936, "step": 38040 }, { "epoch": 3.8111383783242347, "grad_norm": 1.9055705070495605, "learning_rate": 6.683458796300349e-06, "loss": 0.4196, "step": 38050 }, { "epoch": 3.812140031051235, "grad_norm": 1.5801509618759155, "learning_rate": 6.672733082300739e-06, "loss": 0.3927, "step": 38060 }, { "epoch": 3.813141683778234, "grad_norm": 2.310429573059082, "learning_rate": 6.662014655992791e-06, "loss": 0.4781, "step": 38070 }, { "epoch": 3.8141433365052335, "grad_norm": 2.7247142791748047, "learning_rate": 6.651303521638599e-06, "loss": 0.4488, "step": 38080 }, { "epoch": 3.815144989232233, "grad_norm": 1.6246259212493896, "learning_rate": 6.640599683497364e-06, "loss": 0.3836, "step": 38090 }, { "epoch": 3.8161466419592327, "grad_norm": 1.9836045503616333, "learning_rate": 6.629903145825378e-06, "loss": 0.3884, "step": 38100 }, { "epoch": 3.8171482946862323, "grad_norm": 1.8660048246383667, "learning_rate": 6.619213912876038e-06, "loss": 0.3775, "step": 38110 }, { "epoch": 3.818149947413232, "grad_norm": 2.0846478939056396, "learning_rate": 6.608531988899822e-06, "loss": 0.4793, "step": 38120 }, { "epoch": 3.8191516001402315, "grad_norm": 2.101534366607666, "learning_rate": 6.5978573781443346e-06, "loss": 0.4689, "step": 38130 }, { "epoch": 3.820153252867231, "grad_norm": 2.1078479290008545, "learning_rate": 6.587190084854228e-06, "loss": 0.4624, "step": 38140 }, { "epoch": 3.8211549055942307, "grad_norm": 1.6181825399398804, "learning_rate": 6.576530113271265e-06, "loss": 0.4424, "step": 38150 }, { "epoch": 3.82215655832123, "grad_norm": 1.6215962171554565, "learning_rate": 6.565877467634324e-06, "loss": 0.4434, "step": 38160 }, { "epoch": 3.82315821104823, "grad_norm": 2.2983250617980957, "learning_rate": 6.5552321521793195e-06, "loss": 0.4694, "step": 38170 }, { "epoch": 3.824159863775229, "grad_norm": 2.1482555866241455, "learning_rate": 6.5445941711392845e-06, "loss": 0.4466, "step": 38180 }, { "epoch": 3.8251615165022286, "grad_norm": 2.240410804748535, "learning_rate": 6.533963528744319e-06, "loss": 0.4196, "step": 38190 }, { "epoch": 3.826163169229228, "grad_norm": 2.4457499980926514, "learning_rate": 6.523340229221639e-06, "loss": 0.4329, "step": 38200 }, { "epoch": 3.8271648219562278, "grad_norm": 2.4264252185821533, "learning_rate": 6.51272427679549e-06, "loss": 0.3891, "step": 38210 }, { "epoch": 3.8281664746832273, "grad_norm": 1.8337993621826172, "learning_rate": 6.5021156756872175e-06, "loss": 0.3847, "step": 38220 }, { "epoch": 3.829168127410227, "grad_norm": 3.0118589401245117, "learning_rate": 6.491514430115278e-06, "loss": 0.4514, "step": 38230 }, { "epoch": 3.8301697801372265, "grad_norm": 2.0712578296661377, "learning_rate": 6.480920544295141e-06, "loss": 0.4276, "step": 38240 }, { "epoch": 3.831171432864226, "grad_norm": 1.8239521980285645, "learning_rate": 6.4703340224393895e-06, "loss": 0.3844, "step": 38250 }, { "epoch": 3.8321730855912257, "grad_norm": 2.277188301086426, "learning_rate": 6.459754868757675e-06, "loss": 0.475, "step": 38260 }, { "epoch": 3.833174738318225, "grad_norm": 2.219277858734131, "learning_rate": 6.449183087456723e-06, "loss": 0.4694, "step": 38270 }, { "epoch": 3.834176391045225, "grad_norm": 2.4775564670562744, "learning_rate": 6.4386186827402884e-06, "loss": 0.4562, "step": 38280 }, { "epoch": 3.835178043772224, "grad_norm": 2.250697612762451, "learning_rate": 6.428061658809248e-06, "loss": 0.4658, "step": 38290 }, { "epoch": 3.8361796964992236, "grad_norm": 2.2010769844055176, "learning_rate": 6.41751201986151e-06, "loss": 0.4638, "step": 38300 }, { "epoch": 3.8371813492262232, "grad_norm": 2.2128689289093018, "learning_rate": 6.406969770092056e-06, "loss": 0.4676, "step": 38310 }, { "epoch": 3.838183001953223, "grad_norm": 2.170311450958252, "learning_rate": 6.396434913692928e-06, "loss": 0.4089, "step": 38320 }, { "epoch": 3.8391846546802224, "grad_norm": 2.4297749996185303, "learning_rate": 6.385907454853224e-06, "loss": 0.4224, "step": 38330 }, { "epoch": 3.840186307407222, "grad_norm": 1.6787548065185547, "learning_rate": 6.375387397759114e-06, "loss": 0.4002, "step": 38340 }, { "epoch": 3.8411879601342216, "grad_norm": 2.9497814178466797, "learning_rate": 6.364874746593793e-06, "loss": 0.4372, "step": 38350 }, { "epoch": 3.8421896128612207, "grad_norm": 2.112044334411621, "learning_rate": 6.354369505537555e-06, "loss": 0.4423, "step": 38360 }, { "epoch": 3.8431912655882208, "grad_norm": 1.6409683227539062, "learning_rate": 6.343871678767715e-06, "loss": 0.4345, "step": 38370 }, { "epoch": 3.84419291831522, "grad_norm": 2.136765241622925, "learning_rate": 6.333381270458655e-06, "loss": 0.4651, "step": 38380 }, { "epoch": 3.8451945710422195, "grad_norm": 2.24963641166687, "learning_rate": 6.322898284781801e-06, "loss": 0.4305, "step": 38390 }, { "epoch": 3.846196223769219, "grad_norm": 2.395167112350464, "learning_rate": 6.312422725905626e-06, "loss": 0.427, "step": 38400 }, { "epoch": 3.8471978764962187, "grad_norm": 2.715379238128662, "learning_rate": 6.3019545979956545e-06, "loss": 0.483, "step": 38410 }, { "epoch": 3.8481995292232183, "grad_norm": 2.4242758750915527, "learning_rate": 6.291493905214454e-06, "loss": 0.3938, "step": 38420 }, { "epoch": 3.849201181950218, "grad_norm": 2.016488790512085, "learning_rate": 6.281040651721637e-06, "loss": 0.4412, "step": 38430 }, { "epoch": 3.8502028346772175, "grad_norm": 2.018977403640747, "learning_rate": 6.270594841673852e-06, "loss": 0.4535, "step": 38440 }, { "epoch": 3.851204487404217, "grad_norm": 2.2235140800476074, "learning_rate": 6.260156479224797e-06, "loss": 0.4212, "step": 38450 }, { "epoch": 3.8522061401312166, "grad_norm": 2.009124279022217, "learning_rate": 6.2497255685251995e-06, "loss": 0.3964, "step": 38460 }, { "epoch": 3.853207792858216, "grad_norm": 2.90751576423645, "learning_rate": 6.239302113722833e-06, "loss": 0.4423, "step": 38470 }, { "epoch": 3.854209445585216, "grad_norm": 2.9282796382904053, "learning_rate": 6.228886118962493e-06, "loss": 0.4727, "step": 38480 }, { "epoch": 3.855211098312215, "grad_norm": 2.084012985229492, "learning_rate": 6.218477588386035e-06, "loss": 0.456, "step": 38490 }, { "epoch": 3.8562127510392146, "grad_norm": 2.1381776332855225, "learning_rate": 6.208076526132306e-06, "loss": 0.4246, "step": 38500 }, { "epoch": 3.857214403766214, "grad_norm": 1.9279459714889526, "learning_rate": 6.197682936337218e-06, "loss": 0.404, "step": 38510 }, { "epoch": 3.8582160564932138, "grad_norm": 1.9992539882659912, "learning_rate": 6.187296823133698e-06, "loss": 0.4197, "step": 38520 }, { "epoch": 3.8592177092202133, "grad_norm": 1.7238306999206543, "learning_rate": 6.1769181906516955e-06, "loss": 0.3974, "step": 38530 }, { "epoch": 3.860219361947213, "grad_norm": 1.9684339761734009, "learning_rate": 6.1665470430181975e-06, "loss": 0.4424, "step": 38540 }, { "epoch": 3.8612210146742125, "grad_norm": 2.5566136837005615, "learning_rate": 6.1561833843571975e-06, "loss": 0.4095, "step": 38550 }, { "epoch": 3.862222667401212, "grad_norm": 2.300997495651245, "learning_rate": 6.1458272187897455e-06, "loss": 0.4618, "step": 38560 }, { "epoch": 3.8632243201282117, "grad_norm": 2.2941536903381348, "learning_rate": 6.135478550433865e-06, "loss": 0.4177, "step": 38570 }, { "epoch": 3.864225972855211, "grad_norm": 1.9589506387710571, "learning_rate": 6.125137383404622e-06, "loss": 0.4183, "step": 38580 }, { "epoch": 3.865227625582211, "grad_norm": 1.2186977863311768, "learning_rate": 6.114803721814114e-06, "loss": 0.4134, "step": 38590 }, { "epoch": 3.86622927830921, "grad_norm": 1.7170480489730835, "learning_rate": 6.104477569771439e-06, "loss": 0.3788, "step": 38600 }, { "epoch": 3.8672309310362096, "grad_norm": 3.361743211746216, "learning_rate": 6.094158931382685e-06, "loss": 0.4188, "step": 38610 }, { "epoch": 3.868232583763209, "grad_norm": 2.1883580684661865, "learning_rate": 6.083847810751004e-06, "loss": 0.4335, "step": 38620 }, { "epoch": 3.869234236490209, "grad_norm": 1.248789668083191, "learning_rate": 6.073544211976523e-06, "loss": 0.4231, "step": 38630 }, { "epoch": 3.8702358892172084, "grad_norm": 2.8954248428344727, "learning_rate": 6.063248139156372e-06, "loss": 0.4248, "step": 38640 }, { "epoch": 3.871237541944208, "grad_norm": 1.9824678897857666, "learning_rate": 6.052959596384719e-06, "loss": 0.4393, "step": 38650 }, { "epoch": 3.8722391946712076, "grad_norm": 2.0736277103424072, "learning_rate": 6.042678587752718e-06, "loss": 0.3752, "step": 38660 }, { "epoch": 3.873240847398207, "grad_norm": 2.2819983959198, "learning_rate": 6.032405117348533e-06, "loss": 0.4167, "step": 38670 }, { "epoch": 3.8742425001252068, "grad_norm": 2.657353639602661, "learning_rate": 6.022139189257306e-06, "loss": 0.4253, "step": 38680 }, { "epoch": 3.875244152852206, "grad_norm": 2.045306921005249, "learning_rate": 6.011880807561227e-06, "loss": 0.398, "step": 38690 }, { "epoch": 3.876245805579206, "grad_norm": 1.6201101541519165, "learning_rate": 6.001629976339448e-06, "loss": 0.4137, "step": 38700 }, { "epoch": 3.877247458306205, "grad_norm": 2.2877581119537354, "learning_rate": 5.991386699668136e-06, "loss": 0.3722, "step": 38710 }, { "epoch": 3.8782491110332047, "grad_norm": 1.7606736421585083, "learning_rate": 5.981150981620443e-06, "loss": 0.3913, "step": 38720 }, { "epoch": 3.8792507637602043, "grad_norm": 2.2930450439453125, "learning_rate": 5.970922826266523e-06, "loss": 0.4395, "step": 38730 }, { "epoch": 3.880252416487204, "grad_norm": 2.470388650894165, "learning_rate": 5.960702237673521e-06, "loss": 0.4097, "step": 38740 }, { "epoch": 3.8812540692142035, "grad_norm": 2.244858503341675, "learning_rate": 5.950489219905572e-06, "loss": 0.4404, "step": 38750 }, { "epoch": 3.882255721941203, "grad_norm": 2.1141445636749268, "learning_rate": 5.9402837770238e-06, "loss": 0.437, "step": 38760 }, { "epoch": 3.8832573746682026, "grad_norm": 2.3806586265563965, "learning_rate": 5.930085913086322e-06, "loss": 0.432, "step": 38770 }, { "epoch": 3.8842590273952022, "grad_norm": 2.2542169094085693, "learning_rate": 5.919895632148231e-06, "loss": 0.3765, "step": 38780 }, { "epoch": 3.885260680122202, "grad_norm": 2.2765283584594727, "learning_rate": 5.909712938261616e-06, "loss": 0.4707, "step": 38790 }, { "epoch": 3.886262332849201, "grad_norm": 1.9423617124557495, "learning_rate": 5.899537835475544e-06, "loss": 0.4707, "step": 38800 }, { "epoch": 3.887263985576201, "grad_norm": 2.1070590019226074, "learning_rate": 5.889370327836061e-06, "loss": 0.4737, "step": 38810 }, { "epoch": 3.8882656383032, "grad_norm": 2.498217821121216, "learning_rate": 5.879210419386197e-06, "loss": 0.3988, "step": 38820 }, { "epoch": 3.8892672910301997, "grad_norm": 2.292095184326172, "learning_rate": 5.869058114165956e-06, "loss": 0.444, "step": 38830 }, { "epoch": 3.8902689437571993, "grad_norm": 2.990748882293701, "learning_rate": 5.858913416212325e-06, "loss": 0.4561, "step": 38840 }, { "epoch": 3.891270596484199, "grad_norm": 2.656426429748535, "learning_rate": 5.848776329559261e-06, "loss": 0.4223, "step": 38850 }, { "epoch": 3.8922722492111985, "grad_norm": 2.1715846061706543, "learning_rate": 5.838646858237695e-06, "loss": 0.4408, "step": 38860 }, { "epoch": 3.893273901938198, "grad_norm": 2.254647970199585, "learning_rate": 5.828525006275532e-06, "loss": 0.4076, "step": 38870 }, { "epoch": 3.8942755546651977, "grad_norm": 3.026210308074951, "learning_rate": 5.818410777697639e-06, "loss": 0.4982, "step": 38880 }, { "epoch": 3.895277207392197, "grad_norm": 1.857161521911621, "learning_rate": 5.808304176525875e-06, "loss": 0.458, "step": 38890 }, { "epoch": 3.896278860119197, "grad_norm": 1.916955828666687, "learning_rate": 5.798205206779033e-06, "loss": 0.3936, "step": 38900 }, { "epoch": 3.897280512846196, "grad_norm": 2.5912342071533203, "learning_rate": 5.788113872472886e-06, "loss": 0.4142, "step": 38910 }, { "epoch": 3.8982821655731956, "grad_norm": 2.2807843685150146, "learning_rate": 5.778030177620198e-06, "loss": 0.471, "step": 38920 }, { "epoch": 3.899283818300195, "grad_norm": 1.8373711109161377, "learning_rate": 5.767954126230641e-06, "loss": 0.4649, "step": 38930 }, { "epoch": 3.900285471027195, "grad_norm": 2.2042641639709473, "learning_rate": 5.757885722310882e-06, "loss": 0.4439, "step": 38940 }, { "epoch": 3.9012871237541944, "grad_norm": 1.9236149787902832, "learning_rate": 5.747824969864554e-06, "loss": 0.4517, "step": 38950 }, { "epoch": 3.902288776481194, "grad_norm": 2.47882080078125, "learning_rate": 5.7377718728922365e-06, "loss": 0.4453, "step": 38960 }, { "epoch": 3.9032904292081936, "grad_norm": 1.873042106628418, "learning_rate": 5.72772643539144e-06, "loss": 0.4618, "step": 38970 }, { "epoch": 3.904292081935193, "grad_norm": 2.677980661392212, "learning_rate": 5.7176886613566735e-06, "loss": 0.4366, "step": 38980 }, { "epoch": 3.9052937346621928, "grad_norm": 2.4638803005218506, "learning_rate": 5.707658554779374e-06, "loss": 0.4097, "step": 38990 }, { "epoch": 3.906295387389192, "grad_norm": 2.025956869125366, "learning_rate": 5.697636119647939e-06, "loss": 0.4124, "step": 39000 }, { "epoch": 3.907297040116192, "grad_norm": 1.7988700866699219, "learning_rate": 5.68762135994769e-06, "loss": 0.3835, "step": 39010 }, { "epoch": 3.908298692843191, "grad_norm": 1.9764509201049805, "learning_rate": 5.677614279660934e-06, "loss": 0.4574, "step": 39020 }, { "epoch": 3.9093003455701907, "grad_norm": 2.4781711101531982, "learning_rate": 5.667614882766908e-06, "loss": 0.4962, "step": 39030 }, { "epoch": 3.9103019982971903, "grad_norm": 1.637224555015564, "learning_rate": 5.6576231732417745e-06, "loss": 0.4241, "step": 39040 }, { "epoch": 3.91130365102419, "grad_norm": 2.451122522354126, "learning_rate": 5.647639155058676e-06, "loss": 0.412, "step": 39050 }, { "epoch": 3.9123053037511895, "grad_norm": 1.8591203689575195, "learning_rate": 5.63766283218767e-06, "loss": 0.4624, "step": 39060 }, { "epoch": 3.913306956478189, "grad_norm": 2.224898099899292, "learning_rate": 5.627694208595763e-06, "loss": 0.4359, "step": 39070 }, { "epoch": 3.9143086092051886, "grad_norm": 2.6811466217041016, "learning_rate": 5.617733288246898e-06, "loss": 0.3711, "step": 39080 }, { "epoch": 3.9153102619321882, "grad_norm": 2.6933958530426025, "learning_rate": 5.607780075101956e-06, "loss": 0.4493, "step": 39090 }, { "epoch": 3.916311914659188, "grad_norm": 1.5979939699172974, "learning_rate": 5.597834573118754e-06, "loss": 0.3847, "step": 39100 }, { "epoch": 3.917313567386187, "grad_norm": 2.687382698059082, "learning_rate": 5.587896786252039e-06, "loss": 0.4065, "step": 39110 }, { "epoch": 3.918315220113187, "grad_norm": 2.6313157081604004, "learning_rate": 5.577966718453495e-06, "loss": 0.4204, "step": 39120 }, { "epoch": 3.919316872840186, "grad_norm": 2.1213088035583496, "learning_rate": 5.5680443736717325e-06, "loss": 0.4447, "step": 39130 }, { "epoch": 3.9203185255671857, "grad_norm": 2.800734281539917, "learning_rate": 5.558129755852295e-06, "loss": 0.42, "step": 39140 }, { "epoch": 3.9213201782941853, "grad_norm": 2.896596908569336, "learning_rate": 5.548222868937649e-06, "loss": 0.453, "step": 39150 }, { "epoch": 3.922321831021185, "grad_norm": 2.1709372997283936, "learning_rate": 5.538323716867194e-06, "loss": 0.4739, "step": 39160 }, { "epoch": 3.9233234837481845, "grad_norm": 2.2188522815704346, "learning_rate": 5.528432303577244e-06, "loss": 0.432, "step": 39170 }, { "epoch": 3.924325136475184, "grad_norm": 2.2879672050476074, "learning_rate": 5.518548633001039e-06, "loss": 0.5457, "step": 39180 }, { "epoch": 3.9253267892021837, "grad_norm": 2.0803894996643066, "learning_rate": 5.508672709068746e-06, "loss": 0.3965, "step": 39190 }, { "epoch": 3.9263284419291833, "grad_norm": 1.8538846969604492, "learning_rate": 5.498804535707447e-06, "loss": 0.4526, "step": 39200 }, { "epoch": 3.927330094656183, "grad_norm": 2.3934788703918457, "learning_rate": 5.488944116841144e-06, "loss": 0.3509, "step": 39210 }, { "epoch": 3.928331747383182, "grad_norm": 1.6396085023880005, "learning_rate": 5.4790914563907474e-06, "loss": 0.4732, "step": 39220 }, { "epoch": 3.929333400110182, "grad_norm": 1.8581525087356567, "learning_rate": 5.469246558274096e-06, "loss": 0.4159, "step": 39230 }, { "epoch": 3.930335052837181, "grad_norm": 2.8066928386688232, "learning_rate": 5.459409426405926e-06, "loss": 0.493, "step": 39240 }, { "epoch": 3.931336705564181, "grad_norm": 2.4748148918151855, "learning_rate": 5.449580064697915e-06, "loss": 0.4374, "step": 39250 }, { "epoch": 3.9323383582911804, "grad_norm": 2.191162586212158, "learning_rate": 5.43975847705861e-06, "loss": 0.4382, "step": 39260 }, { "epoch": 3.93334001101818, "grad_norm": 2.5699470043182373, "learning_rate": 5.429944667393486e-06, "loss": 0.3827, "step": 39270 }, { "epoch": 3.9343416637451796, "grad_norm": 2.012481689453125, "learning_rate": 5.420138639604947e-06, "loss": 0.4211, "step": 39280 }, { "epoch": 3.935343316472179, "grad_norm": 2.005993127822876, "learning_rate": 5.410340397592262e-06, "loss": 0.4031, "step": 39290 }, { "epoch": 3.9363449691991788, "grad_norm": 2.065284490585327, "learning_rate": 5.4005499452516234e-06, "loss": 0.4853, "step": 39300 }, { "epoch": 3.9373466219261783, "grad_norm": 3.1377053260803223, "learning_rate": 5.3907672864761395e-06, "loss": 0.4342, "step": 39310 }, { "epoch": 3.938348274653178, "grad_norm": 1.8640254735946655, "learning_rate": 5.380992425155809e-06, "loss": 0.4401, "step": 39320 }, { "epoch": 3.939349927380177, "grad_norm": 2.006674289703369, "learning_rate": 5.371225365177513e-06, "loss": 0.3803, "step": 39330 }, { "epoch": 3.9403515801071767, "grad_norm": 1.887819766998291, "learning_rate": 5.361466110425045e-06, "loss": 0.4175, "step": 39340 }, { "epoch": 3.9413532328341763, "grad_norm": 1.9000790119171143, "learning_rate": 5.351714664779106e-06, "loss": 0.4634, "step": 39350 }, { "epoch": 3.942354885561176, "grad_norm": 2.495184898376465, "learning_rate": 5.341971032117285e-06, "loss": 0.4234, "step": 39360 }, { "epoch": 3.9433565382881755, "grad_norm": 2.152071237564087, "learning_rate": 5.332235216314035e-06, "loss": 0.3666, "step": 39370 }, { "epoch": 3.944358191015175, "grad_norm": 2.0491833686828613, "learning_rate": 5.32250722124075e-06, "loss": 0.4581, "step": 39380 }, { "epoch": 3.9453598437421746, "grad_norm": 2.057877540588379, "learning_rate": 5.312787050765689e-06, "loss": 0.4422, "step": 39390 }, { "epoch": 3.9463614964691742, "grad_norm": 2.5603151321411133, "learning_rate": 5.303074708753977e-06, "loss": 0.4315, "step": 39400 }, { "epoch": 3.947363149196174, "grad_norm": 2.4202048778533936, "learning_rate": 5.293370199067671e-06, "loss": 0.4272, "step": 39410 }, { "epoch": 3.948364801923173, "grad_norm": 1.896180510520935, "learning_rate": 5.283673525565688e-06, "loss": 0.3761, "step": 39420 }, { "epoch": 3.949366454650173, "grad_norm": 2.138786554336548, "learning_rate": 5.273984692103831e-06, "loss": 0.418, "step": 39430 }, { "epoch": 3.950368107377172, "grad_norm": 2.09519100189209, "learning_rate": 5.264303702534784e-06, "loss": 0.4182, "step": 39440 }, { "epoch": 3.9513697601041717, "grad_norm": 2.606905460357666, "learning_rate": 5.254630560708123e-06, "loss": 0.4222, "step": 39450 }, { "epoch": 3.9523714128311713, "grad_norm": 2.679440498352051, "learning_rate": 5.24496527047029e-06, "loss": 0.4478, "step": 39460 }, { "epoch": 3.953373065558171, "grad_norm": 2.5487256050109863, "learning_rate": 5.23530783566461e-06, "loss": 0.373, "step": 39470 }, { "epoch": 3.9543747182851705, "grad_norm": 2.0131003856658936, "learning_rate": 5.225658260131289e-06, "loss": 0.3875, "step": 39480 }, { "epoch": 3.95537637101217, "grad_norm": 1.9472322463989258, "learning_rate": 5.216016547707403e-06, "loss": 0.38, "step": 39490 }, { "epoch": 3.9563780237391697, "grad_norm": 1.9929423332214355, "learning_rate": 5.2063827022269e-06, "loss": 0.4123, "step": 39500 }, { "epoch": 3.9573796764661693, "grad_norm": 2.648130416870117, "learning_rate": 5.196756727520602e-06, "loss": 0.4668, "step": 39510 }, { "epoch": 3.958381329193169, "grad_norm": 2.119204521179199, "learning_rate": 5.187138627416202e-06, "loss": 0.3989, "step": 39520 }, { "epoch": 3.959382981920168, "grad_norm": 2.0803627967834473, "learning_rate": 5.177528405738261e-06, "loss": 0.3887, "step": 39530 }, { "epoch": 3.960384634647168, "grad_norm": 2.0360963344573975, "learning_rate": 5.167926066308207e-06, "loss": 0.4147, "step": 39540 }, { "epoch": 3.961386287374167, "grad_norm": 2.736513376235962, "learning_rate": 5.158331612944337e-06, "loss": 0.4734, "step": 39550 }, { "epoch": 3.962387940101167, "grad_norm": 2.068869113922119, "learning_rate": 5.1487450494618004e-06, "loss": 0.4155, "step": 39560 }, { "epoch": 3.9633895928281664, "grad_norm": 2.3239376544952393, "learning_rate": 5.139166379672627e-06, "loss": 0.4444, "step": 39570 }, { "epoch": 3.964391245555166, "grad_norm": 2.593562602996826, "learning_rate": 5.129595607385693e-06, "loss": 0.4024, "step": 39580 }, { "epoch": 3.9653928982821656, "grad_norm": 1.6795233488082886, "learning_rate": 5.120032736406744e-06, "loss": 0.4005, "step": 39590 }, { "epoch": 3.966394551009165, "grad_norm": 1.533920407295227, "learning_rate": 5.110477770538366e-06, "loss": 0.5235, "step": 39600 }, { "epoch": 3.9673962037361648, "grad_norm": 2.483339309692383, "learning_rate": 5.100930713580044e-06, "loss": 0.4192, "step": 39610 }, { "epoch": 3.9683978564631643, "grad_norm": 2.7518630027770996, "learning_rate": 5.09139156932806e-06, "loss": 0.4448, "step": 39620 }, { "epoch": 3.969399509190164, "grad_norm": 2.8066458702087402, "learning_rate": 5.081860341575584e-06, "loss": 0.3969, "step": 39630 }, { "epoch": 3.970401161917163, "grad_norm": 2.323917865753174, "learning_rate": 5.072337034112645e-06, "loss": 0.4005, "step": 39640 }, { "epoch": 3.971402814644163, "grad_norm": 1.8697916269302368, "learning_rate": 5.062821650726113e-06, "loss": 0.361, "step": 39650 }, { "epoch": 3.9724044673711623, "grad_norm": 2.3262734413146973, "learning_rate": 5.053314195199685e-06, "loss": 0.4364, "step": 39660 }, { "epoch": 3.973406120098162, "grad_norm": 2.201371192932129, "learning_rate": 5.043814671313932e-06, "loss": 0.4244, "step": 39670 }, { "epoch": 3.9744077728251614, "grad_norm": 2.123957633972168, "learning_rate": 5.0343230828462764e-06, "loss": 0.366, "step": 39680 }, { "epoch": 3.975409425552161, "grad_norm": 2.200098752975464, "learning_rate": 5.02483943357096e-06, "loss": 0.503, "step": 39690 }, { "epoch": 3.9764110782791606, "grad_norm": 1.9379470348358154, "learning_rate": 5.015363727259076e-06, "loss": 0.4664, "step": 39700 }, { "epoch": 3.97741273100616, "grad_norm": 1.8062063455581665, "learning_rate": 5.0058959676785785e-06, "loss": 0.4212, "step": 39710 }, { "epoch": 3.97841438373316, "grad_norm": 2.2486565113067627, "learning_rate": 4.996436158594245e-06, "loss": 0.4427, "step": 39720 }, { "epoch": 3.9794160364601594, "grad_norm": 2.03916597366333, "learning_rate": 4.986984303767675e-06, "loss": 0.4555, "step": 39730 }, { "epoch": 3.980417689187159, "grad_norm": 1.792839527130127, "learning_rate": 4.9775404069573425e-06, "loss": 0.3701, "step": 39740 }, { "epoch": 3.981419341914158, "grad_norm": 2.415205240249634, "learning_rate": 4.968104471918533e-06, "loss": 0.5279, "step": 39750 }, { "epoch": 3.982420994641158, "grad_norm": 1.9716744422912598, "learning_rate": 4.958676502403367e-06, "loss": 0.4309, "step": 39760 }, { "epoch": 3.9834226473681573, "grad_norm": 1.593368649482727, "learning_rate": 4.949256502160804e-06, "loss": 0.4196, "step": 39770 }, { "epoch": 3.984424300095157, "grad_norm": 2.1170709133148193, "learning_rate": 4.939844474936634e-06, "loss": 0.4474, "step": 39780 }, { "epoch": 3.9854259528221565, "grad_norm": 2.5782203674316406, "learning_rate": 4.930440424473467e-06, "loss": 0.4556, "step": 39790 }, { "epoch": 3.986427605549156, "grad_norm": 2.213399887084961, "learning_rate": 4.921044354510759e-06, "loss": 0.4286, "step": 39800 }, { "epoch": 3.9874292582761557, "grad_norm": 2.359858989715576, "learning_rate": 4.911656268784775e-06, "loss": 0.4304, "step": 39810 }, { "epoch": 3.9884309110031553, "grad_norm": 2.288480281829834, "learning_rate": 4.902276171028617e-06, "loss": 0.3837, "step": 39820 }, { "epoch": 3.989432563730155, "grad_norm": 2.2940833568573, "learning_rate": 4.8929040649722e-06, "loss": 0.4507, "step": 39830 }, { "epoch": 3.9904342164571545, "grad_norm": 2.6430327892303467, "learning_rate": 4.883539954342276e-06, "loss": 0.3704, "step": 39840 }, { "epoch": 3.991435869184154, "grad_norm": 2.3419792652130127, "learning_rate": 4.874183842862401e-06, "loss": 0.4236, "step": 39850 }, { "epoch": 3.992437521911153, "grad_norm": 2.221893310546875, "learning_rate": 4.864835734252962e-06, "loss": 0.4163, "step": 39860 }, { "epoch": 3.993439174638153, "grad_norm": 2.3750827312469482, "learning_rate": 4.855495632231161e-06, "loss": 0.3974, "step": 39870 }, { "epoch": 3.9944408273651524, "grad_norm": 1.812092900276184, "learning_rate": 4.846163540511011e-06, "loss": 0.43, "step": 39880 }, { "epoch": 3.995442480092152, "grad_norm": 2.505802631378174, "learning_rate": 4.836839462803347e-06, "loss": 0.3909, "step": 39890 }, { "epoch": 3.9964441328191516, "grad_norm": 2.1492509841918945, "learning_rate": 4.827523402815815e-06, "loss": 0.4526, "step": 39900 }, { "epoch": 3.997445785546151, "grad_norm": 2.6524550914764404, "learning_rate": 4.818215364252871e-06, "loss": 0.4235, "step": 39910 }, { "epoch": 3.9984474382731507, "grad_norm": 2.4931113719940186, "learning_rate": 4.808915350815779e-06, "loss": 0.4216, "step": 39920 }, { "epoch": 3.9994490910001503, "grad_norm": 2.9122989177703857, "learning_rate": 4.799623366202615e-06, "loss": 0.4228, "step": 39930 }, { "epoch": 4.0004006610908, "grad_norm": 2.093388319015503, "learning_rate": 4.790339414108278e-06, "loss": 0.3735, "step": 39940 }, { "epoch": 4.001402313817799, "grad_norm": 2.304086685180664, "learning_rate": 4.781063498224439e-06, "loss": 0.4207, "step": 39950 }, { "epoch": 4.002403966544799, "grad_norm": 1.982155442237854, "learning_rate": 4.771795622239592e-06, "loss": 0.3568, "step": 39960 }, { "epoch": 4.003405619271798, "grad_norm": 2.5733442306518555, "learning_rate": 4.762535789839054e-06, "loss": 0.4407, "step": 39970 }, { "epoch": 4.004407271998798, "grad_norm": 2.412712812423706, "learning_rate": 4.753284004704902e-06, "loss": 0.4066, "step": 39980 }, { "epoch": 4.005408924725797, "grad_norm": 1.9959261417388916, "learning_rate": 4.7440402705160425e-06, "loss": 0.3478, "step": 39990 }, { "epoch": 4.006410577452797, "grad_norm": 2.1611344814300537, "learning_rate": 4.734804590948169e-06, "loss": 0.4195, "step": 40000 }, { "epoch": 4.006410577452797, "eval_bleu": 0.3662565942705902, "eval_loss": 0.5082083940505981, "eval_rouge1": 0.7014509543822172, "eval_rouge2": 0.5362548944715757, "eval_rougeL": 0.6602146413463208, "eval_runtime": 86792.7268, "eval_samples_per_second": 0.204, "eval_steps_per_second": 0.026, "eval_wer": 0.7582390590348932, "step": 40000 }, { "epoch": 4.0074122301797965, "grad_norm": 2.3167426586151123, "learning_rate": 4.725576969673789e-06, "loss": 0.4305, "step": 40010 }, { "epoch": 4.008413882906797, "grad_norm": 2.5307557582855225, "learning_rate": 4.7163574103621825e-06, "loss": 0.3887, "step": 40020 }, { "epoch": 4.009415535633796, "grad_norm": 1.9515817165374756, "learning_rate": 4.707145916679426e-06, "loss": 0.4566, "step": 40030 }, { "epoch": 4.010417188360796, "grad_norm": 2.6875641345977783, "learning_rate": 4.6979424922884096e-06, "loss": 0.3808, "step": 40040 }, { "epoch": 4.011418841087795, "grad_norm": 2.634347677230835, "learning_rate": 4.688747140848807e-06, "loss": 0.3669, "step": 40050 }, { "epoch": 4.012420493814794, "grad_norm": 2.083259344100952, "learning_rate": 4.679559866017052e-06, "loss": 0.4264, "step": 40060 }, { "epoch": 4.013422146541794, "grad_norm": 2.376185655593872, "learning_rate": 4.670380671446412e-06, "loss": 0.379, "step": 40070 }, { "epoch": 4.014423799268793, "grad_norm": 2.035813570022583, "learning_rate": 4.661209560786922e-06, "loss": 0.3986, "step": 40080 }, { "epoch": 4.015425451995793, "grad_norm": 1.9220411777496338, "learning_rate": 4.652046537685381e-06, "loss": 0.3928, "step": 40090 }, { "epoch": 4.016427104722792, "grad_norm": 2.600371837615967, "learning_rate": 4.642891605785413e-06, "loss": 0.3932, "step": 40100 }, { "epoch": 4.017428757449792, "grad_norm": 1.7663367986679077, "learning_rate": 4.633744768727394e-06, "loss": 0.3915, "step": 40110 }, { "epoch": 4.018430410176792, "grad_norm": 1.809329867362976, "learning_rate": 4.624606030148493e-06, "loss": 0.3715, "step": 40120 }, { "epoch": 4.019432062903792, "grad_norm": 2.3340373039245605, "learning_rate": 4.615475393682655e-06, "loss": 0.3847, "step": 40130 }, { "epoch": 4.020433715630791, "grad_norm": 2.9561421871185303, "learning_rate": 4.606352862960606e-06, "loss": 0.4141, "step": 40140 }, { "epoch": 4.02143536835779, "grad_norm": 2.7550601959228516, "learning_rate": 4.597238441609855e-06, "loss": 0.3774, "step": 40150 }, { "epoch": 4.02243702108479, "grad_norm": 2.112523317337036, "learning_rate": 4.588132133254658e-06, "loss": 0.3566, "step": 40160 }, { "epoch": 4.023438673811789, "grad_norm": 2.55678653717041, "learning_rate": 4.579033941516087e-06, "loss": 0.4126, "step": 40170 }, { "epoch": 4.024440326538789, "grad_norm": 2.198493719100952, "learning_rate": 4.569943870011956e-06, "loss": 0.3802, "step": 40180 }, { "epoch": 4.025441979265788, "grad_norm": 2.324162006378174, "learning_rate": 4.560861922356863e-06, "loss": 0.4629, "step": 40190 }, { "epoch": 4.026443631992788, "grad_norm": 1.9970417022705078, "learning_rate": 4.551788102162172e-06, "loss": 0.4426, "step": 40200 }, { "epoch": 4.0274452847197875, "grad_norm": 2.143714427947998, "learning_rate": 4.54272241303601e-06, "loss": 0.4101, "step": 40210 }, { "epoch": 4.0284469374467875, "grad_norm": 1.9812605381011963, "learning_rate": 4.5336648585832835e-06, "loss": 0.3751, "step": 40220 }, { "epoch": 4.029448590173787, "grad_norm": 2.0631392002105713, "learning_rate": 4.524615442405652e-06, "loss": 0.3368, "step": 40230 }, { "epoch": 4.030450242900787, "grad_norm": 1.7811130285263062, "learning_rate": 4.5155741681015465e-06, "loss": 0.3955, "step": 40240 }, { "epoch": 4.031451895627786, "grad_norm": 2.1916298866271973, "learning_rate": 4.506541039266154e-06, "loss": 0.4335, "step": 40250 }, { "epoch": 4.032453548354785, "grad_norm": 2.283879518508911, "learning_rate": 4.49751605949143e-06, "loss": 0.4194, "step": 40260 }, { "epoch": 4.033455201081785, "grad_norm": 2.2963759899139404, "learning_rate": 4.4884992323660835e-06, "loss": 0.4499, "step": 40270 }, { "epoch": 4.034456853808784, "grad_norm": 1.6694716215133667, "learning_rate": 4.479490561475585e-06, "loss": 0.4056, "step": 40280 }, { "epoch": 4.035458506535784, "grad_norm": 2.5013225078582764, "learning_rate": 4.470490050402154e-06, "loss": 0.4222, "step": 40290 }, { "epoch": 4.036460159262783, "grad_norm": 2.4829888343811035, "learning_rate": 4.4614977027247924e-06, "loss": 0.4054, "step": 40300 }, { "epoch": 4.037461811989783, "grad_norm": 2.05537748336792, "learning_rate": 4.452513522019214e-06, "loss": 0.4458, "step": 40310 }, { "epoch": 4.0384634647167825, "grad_norm": 2.26316499710083, "learning_rate": 4.443537511857915e-06, "loss": 0.4931, "step": 40320 }, { "epoch": 4.039465117443783, "grad_norm": 2.514526605606079, "learning_rate": 4.434569675810132e-06, "loss": 0.4126, "step": 40330 }, { "epoch": 4.040466770170782, "grad_norm": 1.9930896759033203, "learning_rate": 4.425610017441855e-06, "loss": 0.4017, "step": 40340 }, { "epoch": 4.041468422897782, "grad_norm": 2.2072863578796387, "learning_rate": 4.416658540315824e-06, "loss": 0.4262, "step": 40350 }, { "epoch": 4.042470075624781, "grad_norm": 2.070967435836792, "learning_rate": 4.4077152479915115e-06, "loss": 0.4335, "step": 40360 }, { "epoch": 4.04347172835178, "grad_norm": 2.4259519577026367, "learning_rate": 4.398780144025169e-06, "loss": 0.3856, "step": 40370 }, { "epoch": 4.04447338107878, "grad_norm": 2.0865633487701416, "learning_rate": 4.3898532319697455e-06, "loss": 0.3952, "step": 40380 }, { "epoch": 4.045475033805779, "grad_norm": 2.032640218734741, "learning_rate": 4.38093451537496e-06, "loss": 0.3963, "step": 40390 }, { "epoch": 4.046476686532779, "grad_norm": 2.034989595413208, "learning_rate": 4.372023997787284e-06, "loss": 0.3822, "step": 40400 }, { "epoch": 4.047478339259778, "grad_norm": 2.2075562477111816, "learning_rate": 4.363121682749907e-06, "loss": 0.4047, "step": 40410 }, { "epoch": 4.048479991986778, "grad_norm": 1.8768295049667358, "learning_rate": 4.354227573802752e-06, "loss": 0.3965, "step": 40420 }, { "epoch": 4.049481644713778, "grad_norm": 1.8903871774673462, "learning_rate": 4.345341674482503e-06, "loss": 0.4633, "step": 40430 }, { "epoch": 4.050483297440778, "grad_norm": 1.9325599670410156, "learning_rate": 4.336463988322572e-06, "loss": 0.405, "step": 40440 }, { "epoch": 4.051484950167777, "grad_norm": 2.1987829208374023, "learning_rate": 4.327594518853081e-06, "loss": 0.4235, "step": 40450 }, { "epoch": 4.052486602894777, "grad_norm": 2.2805888652801514, "learning_rate": 4.318733269600919e-06, "loss": 0.4807, "step": 40460 }, { "epoch": 4.053488255621776, "grad_norm": 2.4149937629699707, "learning_rate": 4.30988024408969e-06, "loss": 0.4326, "step": 40470 }, { "epoch": 4.054489908348775, "grad_norm": 1.7438945770263672, "learning_rate": 4.3010354458397295e-06, "loss": 0.4199, "step": 40480 }, { "epoch": 4.055491561075775, "grad_norm": 1.9871585369110107, "learning_rate": 4.292198878368086e-06, "loss": 0.3935, "step": 40490 }, { "epoch": 4.056493213802774, "grad_norm": 2.012375831604004, "learning_rate": 4.2833705451885696e-06, "loss": 0.3794, "step": 40500 }, { "epoch": 4.057494866529774, "grad_norm": 2.4525644779205322, "learning_rate": 4.274550449811687e-06, "loss": 0.439, "step": 40510 }, { "epoch": 4.0584965192567735, "grad_norm": 1.6219289302825928, "learning_rate": 4.265738595744681e-06, "loss": 0.4129, "step": 40520 }, { "epoch": 4.0594981719837735, "grad_norm": 1.9297707080841064, "learning_rate": 4.2569349864915175e-06, "loss": 0.4051, "step": 40530 }, { "epoch": 4.060499824710773, "grad_norm": 2.0720765590667725, "learning_rate": 4.248139625552877e-06, "loss": 0.3447, "step": 40540 }, { "epoch": 4.061501477437773, "grad_norm": 1.5587975978851318, "learning_rate": 4.239352516426167e-06, "loss": 0.3451, "step": 40550 }, { "epoch": 4.062503130164772, "grad_norm": 2.4509339332580566, "learning_rate": 4.230573662605511e-06, "loss": 0.4148, "step": 40560 }, { "epoch": 4.063504782891771, "grad_norm": 2.0733141899108887, "learning_rate": 4.221803067581751e-06, "loss": 0.4208, "step": 40570 }, { "epoch": 4.064506435618771, "grad_norm": 1.82527494430542, "learning_rate": 4.213040734842444e-06, "loss": 0.3823, "step": 40580 }, { "epoch": 4.06550808834577, "grad_norm": 2.46215558052063, "learning_rate": 4.204286667871859e-06, "loss": 0.423, "step": 40590 }, { "epoch": 4.06650974107277, "grad_norm": 2.0969185829162598, "learning_rate": 4.1955408701509854e-06, "loss": 0.4262, "step": 40600 }, { "epoch": 4.067511393799769, "grad_norm": 1.8389031887054443, "learning_rate": 4.186803345157517e-06, "loss": 0.4392, "step": 40610 }, { "epoch": 4.068513046526769, "grad_norm": 2.8035295009613037, "learning_rate": 4.178074096365864e-06, "loss": 0.4361, "step": 40620 }, { "epoch": 4.0695146992537685, "grad_norm": 1.908402442932129, "learning_rate": 4.169353127247139e-06, "loss": 0.3687, "step": 40630 }, { "epoch": 4.0705163519807686, "grad_norm": 1.9811326265335083, "learning_rate": 4.160640441269168e-06, "loss": 0.3685, "step": 40640 }, { "epoch": 4.071518004707768, "grad_norm": 2.062203884124756, "learning_rate": 4.151936041896482e-06, "loss": 0.4121, "step": 40650 }, { "epoch": 4.072519657434768, "grad_norm": 1.943377137184143, "learning_rate": 4.143239932590312e-06, "loss": 0.4364, "step": 40660 }, { "epoch": 4.073521310161767, "grad_norm": 2.0794131755828857, "learning_rate": 4.134552116808602e-06, "loss": 0.3846, "step": 40670 }, { "epoch": 4.074522962888766, "grad_norm": 2.561751365661621, "learning_rate": 4.1258725980059865e-06, "loss": 0.4058, "step": 40680 }, { "epoch": 4.075524615615766, "grad_norm": 2.585106611251831, "learning_rate": 4.117201379633809e-06, "loss": 0.3959, "step": 40690 }, { "epoch": 4.076526268342765, "grad_norm": 1.8804863691329956, "learning_rate": 4.108538465140122e-06, "loss": 0.391, "step": 40700 }, { "epoch": 4.077527921069765, "grad_norm": 1.504548192024231, "learning_rate": 4.099883857969647e-06, "loss": 0.4662, "step": 40710 }, { "epoch": 4.078529573796764, "grad_norm": 2.0218613147735596, "learning_rate": 4.091237561563821e-06, "loss": 0.4463, "step": 40720 }, { "epoch": 4.079531226523764, "grad_norm": 1.8394734859466553, "learning_rate": 4.082599579360794e-06, "loss": 0.3977, "step": 40730 }, { "epoch": 4.080532879250764, "grad_norm": 2.5202815532684326, "learning_rate": 4.073969914795373e-06, "loss": 0.4841, "step": 40740 }, { "epoch": 4.081534531977764, "grad_norm": 2.3350720405578613, "learning_rate": 4.065348571299071e-06, "loss": 0.3872, "step": 40750 }, { "epoch": 4.082536184704763, "grad_norm": 2.0294203758239746, "learning_rate": 4.056735552300115e-06, "loss": 0.349, "step": 40760 }, { "epoch": 4.083537837431763, "grad_norm": 1.7993828058242798, "learning_rate": 4.048130861223395e-06, "loss": 0.3741, "step": 40770 }, { "epoch": 4.084539490158762, "grad_norm": 1.6386586427688599, "learning_rate": 4.0395345014904885e-06, "loss": 0.3829, "step": 40780 }, { "epoch": 4.085541142885761, "grad_norm": 2.2599360942840576, "learning_rate": 4.030946476519684e-06, "loss": 0.3841, "step": 40790 }, { "epoch": 4.086542795612761, "grad_norm": 1.9661349058151245, "learning_rate": 4.022366789725931e-06, "loss": 0.3766, "step": 40800 }, { "epoch": 4.08754444833976, "grad_norm": 2.0001981258392334, "learning_rate": 4.0137954445208876e-06, "loss": 0.4531, "step": 40810 }, { "epoch": 4.08854610106676, "grad_norm": 1.8442391157150269, "learning_rate": 4.0052324443128564e-06, "loss": 0.3898, "step": 40820 }, { "epoch": 4.0895477537937595, "grad_norm": 2.130378246307373, "learning_rate": 3.99667779250687e-06, "loss": 0.4561, "step": 40830 }, { "epoch": 4.0905494065207595, "grad_norm": 2.1687450408935547, "learning_rate": 3.98813149250461e-06, "loss": 0.4071, "step": 40840 }, { "epoch": 4.091551059247759, "grad_norm": 1.72993004322052, "learning_rate": 3.979593547704433e-06, "loss": 0.3537, "step": 40850 }, { "epoch": 4.092552711974759, "grad_norm": 1.8413400650024414, "learning_rate": 3.9710639615014e-06, "loss": 0.4079, "step": 40860 }, { "epoch": 4.093554364701758, "grad_norm": 2.123229742050171, "learning_rate": 3.962542737287226e-06, "loss": 0.3985, "step": 40870 }, { "epoch": 4.094556017428758, "grad_norm": 1.9011422395706177, "learning_rate": 3.954029878450311e-06, "loss": 0.3487, "step": 40880 }, { "epoch": 4.095557670155757, "grad_norm": 1.8646225929260254, "learning_rate": 3.9455253883757234e-06, "loss": 0.4002, "step": 40890 }, { "epoch": 4.096559322882756, "grad_norm": 2.285891056060791, "learning_rate": 3.937029270445206e-06, "loss": 0.3964, "step": 40900 }, { "epoch": 4.097560975609756, "grad_norm": 2.354170799255371, "learning_rate": 3.928541528037172e-06, "loss": 0.3985, "step": 40910 }, { "epoch": 4.098562628336755, "grad_norm": 2.1918609142303467, "learning_rate": 3.920062164526706e-06, "loss": 0.4191, "step": 40920 }, { "epoch": 4.099564281063755, "grad_norm": 2.174389362335205, "learning_rate": 3.911591183285557e-06, "loss": 0.3841, "step": 40930 }, { "epoch": 4.1005659337907545, "grad_norm": 2.802354335784912, "learning_rate": 3.903128587682147e-06, "loss": 0.4132, "step": 40940 }, { "epoch": 4.1015675865177545, "grad_norm": 1.9637892246246338, "learning_rate": 3.894674381081556e-06, "loss": 0.3879, "step": 40950 }, { "epoch": 4.102569239244754, "grad_norm": 2.0116653442382812, "learning_rate": 3.886228566845534e-06, "loss": 0.4091, "step": 40960 }, { "epoch": 4.103570891971754, "grad_norm": 2.062088966369629, "learning_rate": 3.877791148332491e-06, "loss": 0.4185, "step": 40970 }, { "epoch": 4.104572544698753, "grad_norm": 2.834785223007202, "learning_rate": 3.8693621288975e-06, "loss": 0.3921, "step": 40980 }, { "epoch": 4.105574197425753, "grad_norm": 1.9823628664016724, "learning_rate": 3.860941511892291e-06, "loss": 0.4165, "step": 40990 }, { "epoch": 4.106575850152752, "grad_norm": 2.0273165702819824, "learning_rate": 3.852529300665259e-06, "loss": 0.4596, "step": 41000 }, { "epoch": 4.107577502879751, "grad_norm": 1.7067807912826538, "learning_rate": 3.844125498561449e-06, "loss": 0.4131, "step": 41010 }, { "epoch": 4.108579155606751, "grad_norm": 2.307382345199585, "learning_rate": 3.8357301089225696e-06, "loss": 0.4735, "step": 41020 }, { "epoch": 4.10958080833375, "grad_norm": 2.346848964691162, "learning_rate": 3.827343135086978e-06, "loss": 0.4004, "step": 41030 }, { "epoch": 4.11058246106075, "grad_norm": 2.429508686065674, "learning_rate": 3.818964580389686e-06, "loss": 0.4555, "step": 41040 }, { "epoch": 4.11158411378775, "grad_norm": 2.165010929107666, "learning_rate": 3.8105944481623578e-06, "loss": 0.3992, "step": 41050 }, { "epoch": 4.11258576651475, "grad_norm": 2.037492513656616, "learning_rate": 3.802232741733325e-06, "loss": 0.4485, "step": 41060 }, { "epoch": 4.113587419241749, "grad_norm": 2.500692367553711, "learning_rate": 3.7938794644275355e-06, "loss": 0.4451, "step": 41070 }, { "epoch": 4.114589071968749, "grad_norm": 1.6784147024154663, "learning_rate": 3.7855346195666027e-06, "loss": 0.3731, "step": 41080 }, { "epoch": 4.115590724695748, "grad_norm": 2.7670488357543945, "learning_rate": 3.777198210468802e-06, "loss": 0.3972, "step": 41090 }, { "epoch": 4.116592377422748, "grad_norm": 2.1402854919433594, "learning_rate": 3.7688702404490406e-06, "loss": 0.4219, "step": 41100 }, { "epoch": 4.117594030149747, "grad_norm": 2.4333910942077637, "learning_rate": 3.760550712818847e-06, "loss": 0.3754, "step": 41110 }, { "epoch": 4.118595682876746, "grad_norm": 2.354822874069214, "learning_rate": 3.7522396308864367e-06, "loss": 0.401, "step": 41120 }, { "epoch": 4.119597335603746, "grad_norm": 1.6452901363372803, "learning_rate": 3.743936997956646e-06, "loss": 0.3948, "step": 41130 }, { "epoch": 4.1205989883307454, "grad_norm": 2.6851041316986084, "learning_rate": 3.7356428173309337e-06, "loss": 0.4247, "step": 41140 }, { "epoch": 4.1216006410577455, "grad_norm": 2.218315839767456, "learning_rate": 3.7273570923074207e-06, "loss": 0.3556, "step": 41150 }, { "epoch": 4.122602293784745, "grad_norm": 2.302386522293091, "learning_rate": 3.7190798261808657e-06, "loss": 0.3815, "step": 41160 }, { "epoch": 4.123603946511745, "grad_norm": 2.2736644744873047, "learning_rate": 3.7108110222426636e-06, "loss": 0.325, "step": 41170 }, { "epoch": 4.124605599238744, "grad_norm": 1.8379311561584473, "learning_rate": 3.7025506837808173e-06, "loss": 0.3882, "step": 41180 }, { "epoch": 4.125607251965744, "grad_norm": 1.8531453609466553, "learning_rate": 3.6942988140800023e-06, "loss": 0.382, "step": 41190 }, { "epoch": 4.126608904692743, "grad_norm": 2.105767011642456, "learning_rate": 3.686055416421508e-06, "loss": 0.4276, "step": 41200 }, { "epoch": 4.127610557419743, "grad_norm": 1.9618855714797974, "learning_rate": 3.6778204940832394e-06, "loss": 0.3956, "step": 41210 }, { "epoch": 4.128612210146742, "grad_norm": 1.891269326210022, "learning_rate": 3.6695940503397667e-06, "loss": 0.3889, "step": 41220 }, { "epoch": 4.129613862873741, "grad_norm": 1.8943488597869873, "learning_rate": 3.661376088462262e-06, "loss": 0.4319, "step": 41230 }, { "epoch": 4.130615515600741, "grad_norm": 2.5317161083221436, "learning_rate": 3.6531666117185335e-06, "loss": 0.4331, "step": 41240 }, { "epoch": 4.1316171683277405, "grad_norm": 1.882836937904358, "learning_rate": 3.644965623373012e-06, "loss": 0.4, "step": 41250 }, { "epoch": 4.1326188210547405, "grad_norm": 1.9603805541992188, "learning_rate": 3.636773126686757e-06, "loss": 0.3485, "step": 41260 }, { "epoch": 4.13362047378174, "grad_norm": 2.294837236404419, "learning_rate": 3.6285891249174504e-06, "loss": 0.4074, "step": 41270 }, { "epoch": 4.13462212650874, "grad_norm": 2.164186477661133, "learning_rate": 3.6204136213193935e-06, "loss": 0.4174, "step": 41280 }, { "epoch": 4.135623779235739, "grad_norm": 1.9150091409683228, "learning_rate": 3.6122466191435112e-06, "loss": 0.4222, "step": 41290 }, { "epoch": 4.136625431962739, "grad_norm": 2.168137550354004, "learning_rate": 3.6040881216373444e-06, "loss": 0.4368, "step": 41300 }, { "epoch": 4.137627084689738, "grad_norm": 2.411673069000244, "learning_rate": 3.5959381320450596e-06, "loss": 0.4143, "step": 41310 }, { "epoch": 4.138628737416737, "grad_norm": 1.9745694398880005, "learning_rate": 3.5877966536074283e-06, "loss": 0.3476, "step": 41320 }, { "epoch": 4.139630390143737, "grad_norm": 1.8774809837341309, "learning_rate": 3.579663689561852e-06, "loss": 0.3927, "step": 41330 }, { "epoch": 4.140632042870736, "grad_norm": 1.9254579544067383, "learning_rate": 3.5715392431423356e-06, "loss": 0.3869, "step": 41340 }, { "epoch": 4.141633695597736, "grad_norm": 2.5691428184509277, "learning_rate": 3.563423317579498e-06, "loss": 0.4281, "step": 41350 }, { "epoch": 4.142635348324736, "grad_norm": 2.32572340965271, "learning_rate": 3.5553159161005765e-06, "loss": 0.4107, "step": 41360 }, { "epoch": 4.143637001051736, "grad_norm": 2.5501484870910645, "learning_rate": 3.547217041929413e-06, "loss": 0.4378, "step": 41370 }, { "epoch": 4.144638653778735, "grad_norm": 2.0283477306365967, "learning_rate": 3.53912669828646e-06, "loss": 0.3957, "step": 41380 }, { "epoch": 4.145640306505735, "grad_norm": 2.043149709701538, "learning_rate": 3.531044888388779e-06, "loss": 0.4236, "step": 41390 }, { "epoch": 4.146641959232734, "grad_norm": 1.4444963932037354, "learning_rate": 3.5229716154500354e-06, "loss": 0.3652, "step": 41400 }, { "epoch": 4.147643611959734, "grad_norm": 2.416794776916504, "learning_rate": 3.5149068826804993e-06, "loss": 0.4204, "step": 41410 }, { "epoch": 4.148645264686733, "grad_norm": 1.7935189008712769, "learning_rate": 3.5068506932870616e-06, "loss": 0.344, "step": 41420 }, { "epoch": 4.149646917413732, "grad_norm": 2.3463051319122314, "learning_rate": 3.498803050473187e-06, "loss": 0.3847, "step": 41430 }, { "epoch": 4.150648570140732, "grad_norm": 2.427424669265747, "learning_rate": 3.490763957438953e-06, "loss": 0.4119, "step": 41440 }, { "epoch": 4.151650222867731, "grad_norm": 1.8780012130737305, "learning_rate": 3.4827334173810565e-06, "loss": 0.3799, "step": 41450 }, { "epoch": 4.1526518755947315, "grad_norm": 2.543027639389038, "learning_rate": 3.4747114334927777e-06, "loss": 0.3964, "step": 41460 }, { "epoch": 4.153653528321731, "grad_norm": 1.5053480863571167, "learning_rate": 3.4666980089639783e-06, "loss": 0.4012, "step": 41470 }, { "epoch": 4.154655181048731, "grad_norm": 1.9852502346038818, "learning_rate": 3.4586931469811373e-06, "loss": 0.3834, "step": 41480 }, { "epoch": 4.15565683377573, "grad_norm": 2.525178909301758, "learning_rate": 3.450696850727339e-06, "loss": 0.4066, "step": 41490 }, { "epoch": 4.15665848650273, "grad_norm": 2.6303272247314453, "learning_rate": 3.442709123382232e-06, "loss": 0.3694, "step": 41500 }, { "epoch": 4.157660139229729, "grad_norm": 1.9393866062164307, "learning_rate": 3.4347299681220716e-06, "loss": 0.4593, "step": 41510 }, { "epoch": 4.158661791956729, "grad_norm": 1.95452082157135, "learning_rate": 3.426759388119716e-06, "loss": 0.4154, "step": 41520 }, { "epoch": 4.159663444683728, "grad_norm": 2.5059969425201416, "learning_rate": 3.4187973865446005e-06, "loss": 0.4373, "step": 41530 }, { "epoch": 4.160665097410727, "grad_norm": 2.185824394226074, "learning_rate": 3.410843966562741e-06, "loss": 0.3905, "step": 41540 }, { "epoch": 4.161666750137727, "grad_norm": 2.5035550594329834, "learning_rate": 3.402899131336762e-06, "loss": 0.4118, "step": 41550 }, { "epoch": 4.1626684028647265, "grad_norm": 1.755468726158142, "learning_rate": 3.394962884025862e-06, "loss": 0.4559, "step": 41560 }, { "epoch": 4.1636700555917265, "grad_norm": 2.0787034034729004, "learning_rate": 3.387035227785826e-06, "loss": 0.4125, "step": 41570 }, { "epoch": 4.164671708318726, "grad_norm": 2.5473291873931885, "learning_rate": 3.3791161657690225e-06, "loss": 0.4438, "step": 41580 }, { "epoch": 4.165673361045726, "grad_norm": 2.753129243850708, "learning_rate": 3.3712057011244023e-06, "loss": 0.4606, "step": 41590 }, { "epoch": 4.166675013772725, "grad_norm": 2.3650059700012207, "learning_rate": 3.363303836997503e-06, "loss": 0.3975, "step": 41600 }, { "epoch": 4.167676666499725, "grad_norm": 2.2438313961029053, "learning_rate": 3.3554105765304323e-06, "loss": 0.4134, "step": 41610 }, { "epoch": 4.168678319226724, "grad_norm": 2.5630788803100586, "learning_rate": 3.3475259228618856e-06, "loss": 0.3901, "step": 41620 }, { "epoch": 4.169679971953723, "grad_norm": 1.7865986824035645, "learning_rate": 3.3396498791271324e-06, "loss": 0.431, "step": 41630 }, { "epoch": 4.170681624680723, "grad_norm": 2.134347915649414, "learning_rate": 3.3317824484580177e-06, "loss": 0.4261, "step": 41640 }, { "epoch": 4.171683277407722, "grad_norm": 1.7214415073394775, "learning_rate": 3.3239236339829645e-06, "loss": 0.4616, "step": 41650 }, { "epoch": 4.172684930134722, "grad_norm": 1.7558671236038208, "learning_rate": 3.3160734388269666e-06, "loss": 0.4491, "step": 41660 }, { "epoch": 4.173686582861722, "grad_norm": 2.4053850173950195, "learning_rate": 3.308231866111591e-06, "loss": 0.415, "step": 41670 }, { "epoch": 4.174688235588722, "grad_norm": 2.2763030529022217, "learning_rate": 3.300398918954978e-06, "loss": 0.4713, "step": 41680 }, { "epoch": 4.175689888315721, "grad_norm": 2.562437057495117, "learning_rate": 3.2925746004718344e-06, "loss": 0.4066, "step": 41690 }, { "epoch": 4.176691541042721, "grad_norm": 1.9022046327590942, "learning_rate": 3.284758913773442e-06, "loss": 0.369, "step": 41700 }, { "epoch": 4.17769319376972, "grad_norm": 1.930850625038147, "learning_rate": 3.276951861967642e-06, "loss": 0.4493, "step": 41710 }, { "epoch": 4.17869484649672, "grad_norm": 2.1526312828063965, "learning_rate": 3.269153448158846e-06, "loss": 0.4159, "step": 41720 }, { "epoch": 4.179696499223719, "grad_norm": 2.301459550857544, "learning_rate": 3.261363675448037e-06, "loss": 0.4742, "step": 41730 }, { "epoch": 4.180698151950718, "grad_norm": 2.6906955242156982, "learning_rate": 3.253582546932746e-06, "loss": 0.4189, "step": 41740 }, { "epoch": 4.181699804677718, "grad_norm": 1.7854713201522827, "learning_rate": 3.2458100657070916e-06, "loss": 0.4461, "step": 41750 }, { "epoch": 4.182701457404717, "grad_norm": 2.6074023246765137, "learning_rate": 3.2380462348617272e-06, "loss": 0.4518, "step": 41760 }, { "epoch": 4.1837031101317175, "grad_norm": 2.636751174926758, "learning_rate": 3.230291057483875e-06, "loss": 0.4189, "step": 41770 }, { "epoch": 4.184704762858717, "grad_norm": 2.3315718173980713, "learning_rate": 3.2225445366573376e-06, "loss": 0.3592, "step": 41780 }, { "epoch": 4.185706415585717, "grad_norm": 2.527083158493042, "learning_rate": 3.214806675462442e-06, "loss": 0.4188, "step": 41790 }, { "epoch": 4.186708068312716, "grad_norm": 2.1696078777313232, "learning_rate": 3.2070774769760892e-06, "loss": 0.4505, "step": 41800 }, { "epoch": 4.187709721039716, "grad_norm": 2.4959564208984375, "learning_rate": 3.199356944271728e-06, "loss": 0.4, "step": 41810 }, { "epoch": 4.188711373766715, "grad_norm": 1.9425634145736694, "learning_rate": 3.1916450804193865e-06, "loss": 0.3721, "step": 41820 }, { "epoch": 4.189713026493715, "grad_norm": 2.789562702178955, "learning_rate": 3.1839418884856057e-06, "loss": 0.3906, "step": 41830 }, { "epoch": 4.190714679220714, "grad_norm": 2.2337393760681152, "learning_rate": 3.176247371533503e-06, "loss": 0.4394, "step": 41840 }, { "epoch": 4.191716331947713, "grad_norm": 2.4459781646728516, "learning_rate": 3.168561532622749e-06, "loss": 0.4465, "step": 41850 }, { "epoch": 4.192717984674713, "grad_norm": 2.3486621379852295, "learning_rate": 3.1608843748095585e-06, "loss": 0.3933, "step": 41860 }, { "epoch": 4.1937196374017125, "grad_norm": 2.227614402770996, "learning_rate": 3.1532159011466724e-06, "loss": 0.4286, "step": 41870 }, { "epoch": 4.1947212901287125, "grad_norm": 1.9478914737701416, "learning_rate": 3.1455561146834178e-06, "loss": 0.4396, "step": 41880 }, { "epoch": 4.195722942855712, "grad_norm": 2.1462783813476562, "learning_rate": 3.1379050184656477e-06, "loss": 0.3786, "step": 41890 }, { "epoch": 4.196724595582712, "grad_norm": 2.016328811645508, "learning_rate": 3.1302626155357423e-06, "loss": 0.462, "step": 41900 }, { "epoch": 4.197726248309711, "grad_norm": 1.9448295831680298, "learning_rate": 3.1226289089326593e-06, "loss": 0.4233, "step": 41910 }, { "epoch": 4.198727901036711, "grad_norm": 1.9269777536392212, "learning_rate": 3.115003901691871e-06, "loss": 0.3689, "step": 41920 }, { "epoch": 4.19972955376371, "grad_norm": 2.250922679901123, "learning_rate": 3.1073875968454096e-06, "loss": 0.394, "step": 41930 }, { "epoch": 4.20073120649071, "grad_norm": 2.7293055057525635, "learning_rate": 3.099779997421831e-06, "loss": 0.359, "step": 41940 }, { "epoch": 4.201732859217709, "grad_norm": 2.0588979721069336, "learning_rate": 3.0921811064462374e-06, "loss": 0.352, "step": 41950 }, { "epoch": 4.202734511944708, "grad_norm": 1.7878366708755493, "learning_rate": 3.0845909269402756e-06, "loss": 0.3826, "step": 41960 }, { "epoch": 4.203736164671708, "grad_norm": 2.4124763011932373, "learning_rate": 3.0770094619221036e-06, "loss": 0.4297, "step": 41970 }, { "epoch": 4.2047378173987076, "grad_norm": 1.2512212991714478, "learning_rate": 3.0694367144064456e-06, "loss": 0.4079, "step": 41980 }, { "epoch": 4.205739470125708, "grad_norm": 1.5953153371810913, "learning_rate": 3.061872687404538e-06, "loss": 0.3754, "step": 41990 }, { "epoch": 4.206741122852707, "grad_norm": 2.8178634643554688, "learning_rate": 3.0543173839241546e-06, "loss": 0.4456, "step": 42000 }, { "epoch": 4.207742775579707, "grad_norm": 2.2212166786193848, "learning_rate": 3.0467708069696037e-06, "loss": 0.3952, "step": 42010 }, { "epoch": 4.208744428306706, "grad_norm": 2.113723039627075, "learning_rate": 3.039232959541724e-06, "loss": 0.3944, "step": 42020 }, { "epoch": 4.209746081033706, "grad_norm": 2.7201507091522217, "learning_rate": 3.031703844637876e-06, "loss": 0.3985, "step": 42030 }, { "epoch": 4.210747733760705, "grad_norm": 2.516010284423828, "learning_rate": 3.024183465251956e-06, "loss": 0.4158, "step": 42040 }, { "epoch": 4.211749386487705, "grad_norm": 2.367647647857666, "learning_rate": 3.0166718243743785e-06, "loss": 0.3951, "step": 42050 }, { "epoch": 4.212751039214704, "grad_norm": 1.4118751287460327, "learning_rate": 3.0091689249920923e-06, "loss": 0.3785, "step": 42060 }, { "epoch": 4.213752691941703, "grad_norm": 2.2263405323028564, "learning_rate": 3.001674770088564e-06, "loss": 0.3853, "step": 42070 }, { "epoch": 4.2147543446687035, "grad_norm": 1.7431256771087646, "learning_rate": 2.9941893626437817e-06, "loss": 0.3816, "step": 42080 }, { "epoch": 4.215755997395703, "grad_norm": 1.8744088411331177, "learning_rate": 2.986712705634262e-06, "loss": 0.4145, "step": 42090 }, { "epoch": 4.216757650122703, "grad_norm": 1.928413987159729, "learning_rate": 2.979244802033032e-06, "loss": 0.3944, "step": 42100 }, { "epoch": 4.217759302849702, "grad_norm": 2.396594762802124, "learning_rate": 2.971785654809656e-06, "loss": 0.4667, "step": 42110 }, { "epoch": 4.218760955576702, "grad_norm": 2.273517608642578, "learning_rate": 2.9643352669301933e-06, "loss": 0.4142, "step": 42120 }, { "epoch": 4.219762608303701, "grad_norm": 1.8810628652572632, "learning_rate": 2.9568936413572363e-06, "loss": 0.3943, "step": 42130 }, { "epoch": 4.220764261030701, "grad_norm": 2.027733325958252, "learning_rate": 2.9494607810498786e-06, "loss": 0.4632, "step": 42140 }, { "epoch": 4.2217659137577, "grad_norm": 2.457437753677368, "learning_rate": 2.9420366889637592e-06, "loss": 0.439, "step": 42150 }, { "epoch": 4.2227675664847, "grad_norm": 2.446218252182007, "learning_rate": 2.9346213680509882e-06, "loss": 0.3905, "step": 42160 }, { "epoch": 4.223769219211699, "grad_norm": 2.112877368927002, "learning_rate": 2.927214821260213e-06, "loss": 0.3998, "step": 42170 }, { "epoch": 4.2247708719386985, "grad_norm": 2.958712577819824, "learning_rate": 2.9198170515366023e-06, "loss": 0.4534, "step": 42180 }, { "epoch": 4.2257725246656985, "grad_norm": 2.4029061794281006, "learning_rate": 2.9124280618218035e-06, "loss": 0.4766, "step": 42190 }, { "epoch": 4.226774177392698, "grad_norm": 2.6346077919006348, "learning_rate": 2.905047855053991e-06, "loss": 0.4245, "step": 42200 }, { "epoch": 4.227775830119698, "grad_norm": 2.153205156326294, "learning_rate": 2.8976764341678536e-06, "loss": 0.4395, "step": 42210 }, { "epoch": 4.228777482846697, "grad_norm": 1.5752555131912231, "learning_rate": 2.8903138020945797e-06, "loss": 0.3512, "step": 42220 }, { "epoch": 4.229779135573697, "grad_norm": 2.1180450916290283, "learning_rate": 2.882959961761847e-06, "loss": 0.4136, "step": 42230 }, { "epoch": 4.230780788300696, "grad_norm": 2.0195274353027344, "learning_rate": 2.8756149160938635e-06, "loss": 0.42, "step": 42240 }, { "epoch": 4.231782441027696, "grad_norm": 2.2286198139190674, "learning_rate": 2.8682786680113314e-06, "loss": 0.42, "step": 42250 }, { "epoch": 4.232784093754695, "grad_norm": 2.2767393589019775, "learning_rate": 2.860951220431435e-06, "loss": 0.4883, "step": 42260 }, { "epoch": 4.233785746481694, "grad_norm": 2.395681858062744, "learning_rate": 2.8536325762678895e-06, "loss": 0.4004, "step": 42270 }, { "epoch": 4.234787399208694, "grad_norm": 2.077942371368408, "learning_rate": 2.8463227384308898e-06, "loss": 0.4004, "step": 42280 }, { "epoch": 4.2357890519356935, "grad_norm": 2.383152723312378, "learning_rate": 2.8390217098271414e-06, "loss": 0.422, "step": 42290 }, { "epoch": 4.236790704662694, "grad_norm": 1.919995903968811, "learning_rate": 2.8317294933598277e-06, "loss": 0.391, "step": 42300 }, { "epoch": 4.237792357389693, "grad_norm": 2.3750290870666504, "learning_rate": 2.824446091928651e-06, "loss": 0.4244, "step": 42310 }, { "epoch": 4.238794010116693, "grad_norm": 2.0089025497436523, "learning_rate": 2.8171715084297916e-06, "loss": 0.381, "step": 42320 }, { "epoch": 4.239795662843692, "grad_norm": 2.8085126876831055, "learning_rate": 2.809905745755936e-06, "loss": 0.3842, "step": 42330 }, { "epoch": 4.240797315570692, "grad_norm": 2.5227251052856445, "learning_rate": 2.8026488067962515e-06, "loss": 0.4056, "step": 42340 }, { "epoch": 4.241798968297691, "grad_norm": 2.0635979175567627, "learning_rate": 2.7954006944364063e-06, "loss": 0.4036, "step": 42350 }, { "epoch": 4.242800621024691, "grad_norm": 2.093088150024414, "learning_rate": 2.7881614115585485e-06, "loss": 0.3837, "step": 42360 }, { "epoch": 4.24380227375169, "grad_norm": 3.117953300476074, "learning_rate": 2.780930961041328e-06, "loss": 0.414, "step": 42370 }, { "epoch": 4.244803926478689, "grad_norm": 2.0712902545928955, "learning_rate": 2.77370934575987e-06, "loss": 0.4608, "step": 42380 }, { "epoch": 4.2458055792056895, "grad_norm": 2.027517080307007, "learning_rate": 2.766496568585797e-06, "loss": 0.4196, "step": 42390 }, { "epoch": 4.246807231932689, "grad_norm": 2.0849087238311768, "learning_rate": 2.7592926323872088e-06, "loss": 0.4262, "step": 42400 }, { "epoch": 4.247808884659689, "grad_norm": 1.7223390340805054, "learning_rate": 2.7520975400286973e-06, "loss": 0.3834, "step": 42410 }, { "epoch": 4.248810537386688, "grad_norm": 2.2863972187042236, "learning_rate": 2.7449112943713272e-06, "loss": 0.4206, "step": 42420 }, { "epoch": 4.249812190113688, "grad_norm": 1.9107893705368042, "learning_rate": 2.73773389827266e-06, "loss": 0.4077, "step": 42430 }, { "epoch": 4.250813842840687, "grad_norm": 1.7265775203704834, "learning_rate": 2.730565354586723e-06, "loss": 0.4015, "step": 42440 }, { "epoch": 4.251815495567687, "grad_norm": 2.400996208190918, "learning_rate": 2.7234056661640335e-06, "loss": 0.4373, "step": 42450 }, { "epoch": 4.252817148294686, "grad_norm": 2.8759820461273193, "learning_rate": 2.7162548358515865e-06, "loss": 0.4431, "step": 42460 }, { "epoch": 4.253818801021686, "grad_norm": 2.009953737258911, "learning_rate": 2.709112866492847e-06, "loss": 0.3766, "step": 42470 }, { "epoch": 4.254820453748685, "grad_norm": 2.1563308238983154, "learning_rate": 2.7019797609277696e-06, "loss": 0.4626, "step": 42480 }, { "epoch": 4.2558221064756845, "grad_norm": 2.7688074111938477, "learning_rate": 2.69485552199277e-06, "loss": 0.4418, "step": 42490 }, { "epoch": 4.2568237592026845, "grad_norm": 2.4989662170410156, "learning_rate": 2.6877401525207458e-06, "loss": 0.39, "step": 42500 }, { "epoch": 4.257825411929684, "grad_norm": 2.268092393875122, "learning_rate": 2.6806336553410777e-06, "loss": 0.4315, "step": 42510 }, { "epoch": 4.258827064656684, "grad_norm": 2.241440773010254, "learning_rate": 2.6735360332795917e-06, "loss": 0.4064, "step": 42520 }, { "epoch": 4.259828717383683, "grad_norm": 2.1825673580169678, "learning_rate": 2.6664472891586077e-06, "loss": 0.3797, "step": 42530 }, { "epoch": 4.260830370110683, "grad_norm": 2.2951955795288086, "learning_rate": 2.659367425796916e-06, "loss": 0.4349, "step": 42540 }, { "epoch": 4.261832022837682, "grad_norm": 1.7518614530563354, "learning_rate": 2.6522964460097553e-06, "loss": 0.3917, "step": 42550 }, { "epoch": 4.262833675564682, "grad_norm": 2.920267343521118, "learning_rate": 2.645234352608847e-06, "loss": 0.412, "step": 42560 }, { "epoch": 4.263835328291681, "grad_norm": 2.29345965385437, "learning_rate": 2.638181148402383e-06, "loss": 0.3755, "step": 42570 }, { "epoch": 4.26483698101868, "grad_norm": 2.323241710662842, "learning_rate": 2.631136836195014e-06, "loss": 0.4501, "step": 42580 }, { "epoch": 4.26583863374568, "grad_norm": 2.2796177864074707, "learning_rate": 2.6241014187878417e-06, "loss": 0.3953, "step": 42590 }, { "epoch": 4.2668402864726795, "grad_norm": 2.3419172763824463, "learning_rate": 2.6170748989784583e-06, "loss": 0.3953, "step": 42600 }, { "epoch": 4.26784193919968, "grad_norm": 2.1087424755096436, "learning_rate": 2.610057279560896e-06, "loss": 0.4198, "step": 42610 }, { "epoch": 4.268843591926679, "grad_norm": 1.8118137121200562, "learning_rate": 2.603048563325661e-06, "loss": 0.4031, "step": 42620 }, { "epoch": 4.269845244653679, "grad_norm": 2.0749223232269287, "learning_rate": 2.5960487530597018e-06, "loss": 0.4355, "step": 42630 }, { "epoch": 4.270846897380678, "grad_norm": 2.66550612449646, "learning_rate": 2.5890578515464476e-06, "loss": 0.421, "step": 42640 }, { "epoch": 4.271848550107678, "grad_norm": 1.960179090499878, "learning_rate": 2.5820758615657776e-06, "loss": 0.3849, "step": 42650 }, { "epoch": 4.272850202834677, "grad_norm": 2.837324619293213, "learning_rate": 2.575102785894007e-06, "loss": 0.4429, "step": 42660 }, { "epoch": 4.273851855561677, "grad_norm": 1.9536443948745728, "learning_rate": 2.5681386273039426e-06, "loss": 0.3737, "step": 42670 }, { "epoch": 4.274853508288676, "grad_norm": 1.969543695449829, "learning_rate": 2.5611833885648194e-06, "loss": 0.4493, "step": 42680 }, { "epoch": 4.275855161015675, "grad_norm": 2.2718586921691895, "learning_rate": 2.554237072442331e-06, "loss": 0.3987, "step": 42690 }, { "epoch": 4.2768568137426755, "grad_norm": 2.5806398391723633, "learning_rate": 2.5472996816986283e-06, "loss": 0.4017, "step": 42700 }, { "epoch": 4.277858466469675, "grad_norm": 1.761518120765686, "learning_rate": 2.540371219092305e-06, "loss": 0.3936, "step": 42710 }, { "epoch": 4.278860119196675, "grad_norm": 2.4270286560058594, "learning_rate": 2.533451687378413e-06, "loss": 0.3815, "step": 42720 }, { "epoch": 4.279861771923674, "grad_norm": 2.0313477516174316, "learning_rate": 2.5265410893084485e-06, "loss": 0.4022, "step": 42730 }, { "epoch": 4.280863424650674, "grad_norm": 1.7014490365982056, "learning_rate": 2.519639427630352e-06, "loss": 0.4044, "step": 42740 }, { "epoch": 4.281865077377673, "grad_norm": 2.5806448459625244, "learning_rate": 2.51274670508852e-06, "loss": 0.4435, "step": 42750 }, { "epoch": 4.282866730104673, "grad_norm": 2.133436918258667, "learning_rate": 2.505862924423785e-06, "loss": 0.3617, "step": 42760 }, { "epoch": 4.283868382831672, "grad_norm": 2.5142359733581543, "learning_rate": 2.4989880883734273e-06, "loss": 0.4125, "step": 42770 }, { "epoch": 4.284870035558672, "grad_norm": 1.8463962078094482, "learning_rate": 2.4921221996711707e-06, "loss": 0.4152, "step": 42780 }, { "epoch": 4.285871688285671, "grad_norm": 2.2918601036071777, "learning_rate": 2.4852652610471842e-06, "loss": 0.4054, "step": 42790 }, { "epoch": 4.2868733410126705, "grad_norm": 2.54331374168396, "learning_rate": 2.4784172752280733e-06, "loss": 0.3861, "step": 42800 }, { "epoch": 4.2878749937396705, "grad_norm": 1.8478418588638306, "learning_rate": 2.471578244936881e-06, "loss": 0.4161, "step": 42810 }, { "epoch": 4.28887664646667, "grad_norm": 2.231584072113037, "learning_rate": 2.4647481728931e-06, "loss": 0.3572, "step": 42820 }, { "epoch": 4.28987829919367, "grad_norm": 2.3038971424102783, "learning_rate": 2.457927061812648e-06, "loss": 0.3729, "step": 42830 }, { "epoch": 4.290879951920669, "grad_norm": 1.7609002590179443, "learning_rate": 2.45111491440789e-06, "loss": 0.3543, "step": 42840 }, { "epoch": 4.291881604647669, "grad_norm": 2.5485479831695557, "learning_rate": 2.4443117333876194e-06, "loss": 0.358, "step": 42850 }, { "epoch": 4.292883257374668, "grad_norm": 2.667595148086548, "learning_rate": 2.437517521457061e-06, "loss": 0.3777, "step": 42860 }, { "epoch": 4.293884910101668, "grad_norm": 2.162569522857666, "learning_rate": 2.430732281317899e-06, "loss": 0.4588, "step": 42870 }, { "epoch": 4.294886562828667, "grad_norm": 2.4120516777038574, "learning_rate": 2.4239560156682105e-06, "loss": 0.4066, "step": 42880 }, { "epoch": 4.295888215555667, "grad_norm": 2.75325345993042, "learning_rate": 2.417188727202524e-06, "loss": 0.4154, "step": 42890 }, { "epoch": 4.296889868282666, "grad_norm": 1.9056499004364014, "learning_rate": 2.4104304186118088e-06, "loss": 0.4268, "step": 42900 }, { "epoch": 4.2978915210096655, "grad_norm": 2.1397392749786377, "learning_rate": 2.403681092583454e-06, "loss": 0.3723, "step": 42910 }, { "epoch": 4.298893173736666, "grad_norm": 2.5149714946746826, "learning_rate": 2.3969407518012576e-06, "loss": 0.5002, "step": 42920 }, { "epoch": 4.299894826463665, "grad_norm": 1.9185551404953003, "learning_rate": 2.390209398945478e-06, "loss": 0.4161, "step": 42930 }, { "epoch": 4.300896479190665, "grad_norm": 2.358262777328491, "learning_rate": 2.383487036692786e-06, "loss": 0.4237, "step": 42940 }, { "epoch": 4.301898131917664, "grad_norm": 1.9108012914657593, "learning_rate": 2.376773667716262e-06, "loss": 0.4023, "step": 42950 }, { "epoch": 4.302899784644664, "grad_norm": 2.148002862930298, "learning_rate": 2.3700692946854286e-06, "loss": 0.4459, "step": 42960 }, { "epoch": 4.303901437371663, "grad_norm": 1.83115816116333, "learning_rate": 2.363373920266229e-06, "loss": 0.4049, "step": 42970 }, { "epoch": 4.304903090098663, "grad_norm": 2.1353845596313477, "learning_rate": 2.356687547121034e-06, "loss": 0.4329, "step": 42980 }, { "epoch": 4.305904742825662, "grad_norm": 2.241957902908325, "learning_rate": 2.350010177908604e-06, "loss": 0.3842, "step": 42990 }, { "epoch": 4.306906395552662, "grad_norm": 2.3134796619415283, "learning_rate": 2.343341815284164e-06, "loss": 0.4726, "step": 43000 }, { "epoch": 4.3079080482796615, "grad_norm": 1.6615601778030396, "learning_rate": 2.336682461899328e-06, "loss": 0.4071, "step": 43010 }, { "epoch": 4.308909701006661, "grad_norm": 1.741769552230835, "learning_rate": 2.3300321204021257e-06, "loss": 0.331, "step": 43020 }, { "epoch": 4.309911353733661, "grad_norm": 2.2427818775177, "learning_rate": 2.323390793437022e-06, "loss": 0.386, "step": 43030 }, { "epoch": 4.31091300646066, "grad_norm": 3.0531322956085205, "learning_rate": 2.3167584836448875e-06, "loss": 0.3896, "step": 43040 }, { "epoch": 4.31191465918766, "grad_norm": 1.993509292602539, "learning_rate": 2.3101351936630047e-06, "loss": 0.3983, "step": 43050 }, { "epoch": 4.312916311914659, "grad_norm": 2.2616026401519775, "learning_rate": 2.3035209261250716e-06, "loss": 0.4106, "step": 43060 }, { "epoch": 4.313917964641659, "grad_norm": 2.288247585296631, "learning_rate": 2.296915683661202e-06, "loss": 0.3551, "step": 43070 }, { "epoch": 4.314919617368658, "grad_norm": 1.8383212089538574, "learning_rate": 2.290319468897917e-06, "loss": 0.4036, "step": 43080 }, { "epoch": 4.315921270095658, "grad_norm": 2.345820665359497, "learning_rate": 2.2837322844581454e-06, "loss": 0.4122, "step": 43090 }, { "epoch": 4.316922922822657, "grad_norm": 1.7840343713760376, "learning_rate": 2.2771541329612317e-06, "loss": 0.3799, "step": 43100 }, { "epoch": 4.317924575549657, "grad_norm": 1.8226646184921265, "learning_rate": 2.2705850170229246e-06, "loss": 0.4293, "step": 43110 }, { "epoch": 4.3189262282766565, "grad_norm": 1.8495908975601196, "learning_rate": 2.2640249392553823e-06, "loss": 0.3918, "step": 43120 }, { "epoch": 4.319927881003656, "grad_norm": 2.4388835430145264, "learning_rate": 2.257473902267165e-06, "loss": 0.4003, "step": 43130 }, { "epoch": 4.320929533730656, "grad_norm": 1.8674567937850952, "learning_rate": 2.2509319086632425e-06, "loss": 0.381, "step": 43140 }, { "epoch": 4.321931186457655, "grad_norm": 1.9195691347122192, "learning_rate": 2.2443989610449855e-06, "loss": 0.383, "step": 43150 }, { "epoch": 4.322932839184655, "grad_norm": 2.5186662673950195, "learning_rate": 2.2378750620101667e-06, "loss": 0.3618, "step": 43160 }, { "epoch": 4.323934491911654, "grad_norm": 1.8908497095108032, "learning_rate": 2.2313602141529668e-06, "loss": 0.4159, "step": 43170 }, { "epoch": 4.324936144638654, "grad_norm": 2.380554437637329, "learning_rate": 2.22485442006396e-06, "loss": 0.4289, "step": 43180 }, { "epoch": 4.325937797365653, "grad_norm": 2.3493990898132324, "learning_rate": 2.218357682330119e-06, "loss": 0.4513, "step": 43190 }, { "epoch": 4.326939450092653, "grad_norm": 2.776486396789551, "learning_rate": 2.2118700035348328e-06, "loss": 0.3659, "step": 43200 }, { "epoch": 4.327941102819652, "grad_norm": 2.014625310897827, "learning_rate": 2.2053913862578656e-06, "loss": 0.3878, "step": 43210 }, { "epoch": 4.328942755546652, "grad_norm": 2.328507423400879, "learning_rate": 2.198921833075385e-06, "loss": 0.4097, "step": 43220 }, { "epoch": 4.329944408273652, "grad_norm": 2.105379104614258, "learning_rate": 2.192461346559968e-06, "loss": 0.3963, "step": 43230 }, { "epoch": 4.330946061000651, "grad_norm": 2.268242120742798, "learning_rate": 2.1860099292805664e-06, "loss": 0.4335, "step": 43240 }, { "epoch": 4.331947713727651, "grad_norm": 1.6855250597000122, "learning_rate": 2.1795675838025333e-06, "loss": 0.3711, "step": 43250 }, { "epoch": 4.33294936645465, "grad_norm": 2.5439865589141846, "learning_rate": 2.1731343126876276e-06, "loss": 0.363, "step": 43260 }, { "epoch": 4.33395101918165, "grad_norm": 1.8600542545318604, "learning_rate": 2.1667101184939837e-06, "loss": 0.3719, "step": 43270 }, { "epoch": 4.334952671908649, "grad_norm": 2.798398494720459, "learning_rate": 2.160295003776125e-06, "loss": 0.3651, "step": 43280 }, { "epoch": 4.335954324635649, "grad_norm": 1.7904213666915894, "learning_rate": 2.153888971084969e-06, "loss": 0.4151, "step": 43290 }, { "epoch": 4.336955977362648, "grad_norm": 2.4006080627441406, "learning_rate": 2.1474920229678396e-06, "loss": 0.4356, "step": 43300 }, { "epoch": 4.337957630089648, "grad_norm": 2.7240309715270996, "learning_rate": 2.1411041619684186e-06, "loss": 0.4338, "step": 43310 }, { "epoch": 4.3389592828166474, "grad_norm": 1.9607129096984863, "learning_rate": 2.134725390626785e-06, "loss": 0.3561, "step": 43320 }, { "epoch": 4.3399609355436475, "grad_norm": 2.6184070110321045, "learning_rate": 2.1283557114794183e-06, "loss": 0.4381, "step": 43330 }, { "epoch": 4.340962588270647, "grad_norm": 1.8792089223861694, "learning_rate": 2.1219951270591703e-06, "loss": 0.3831, "step": 43340 }, { "epoch": 4.341964240997646, "grad_norm": 1.6819710731506348, "learning_rate": 2.1156436398952623e-06, "loss": 0.3814, "step": 43350 }, { "epoch": 4.342965893724646, "grad_norm": 2.5652620792388916, "learning_rate": 2.1093012525133237e-06, "loss": 0.4009, "step": 43360 }, { "epoch": 4.343967546451645, "grad_norm": 1.6315181255340576, "learning_rate": 2.102967967435354e-06, "loss": 0.4335, "step": 43370 }, { "epoch": 4.344969199178645, "grad_norm": 2.178917407989502, "learning_rate": 2.0966437871797333e-06, "loss": 0.4018, "step": 43380 }, { "epoch": 4.345970851905644, "grad_norm": 1.893080711364746, "learning_rate": 2.0903287142612193e-06, "loss": 0.3939, "step": 43390 }, { "epoch": 4.346972504632644, "grad_norm": 2.0967724323272705, "learning_rate": 2.0840227511909504e-06, "loss": 0.4255, "step": 43400 }, { "epoch": 4.347974157359643, "grad_norm": 2.593022108078003, "learning_rate": 2.077725900476446e-06, "loss": 0.3903, "step": 43410 }, { "epoch": 4.348975810086643, "grad_norm": 1.952826738357544, "learning_rate": 2.071438164621595e-06, "loss": 0.415, "step": 43420 }, { "epoch": 4.3499774628136425, "grad_norm": 1.9698874950408936, "learning_rate": 2.065159546126666e-06, "loss": 0.3708, "step": 43430 }, { "epoch": 4.350979115540642, "grad_norm": 2.352921962738037, "learning_rate": 2.0588900474883017e-06, "loss": 0.4387, "step": 43440 }, { "epoch": 4.351980768267642, "grad_norm": 2.0334842205047607, "learning_rate": 2.0526296711995184e-06, "loss": 0.4526, "step": 43450 }, { "epoch": 4.352982420994641, "grad_norm": 2.4401466846466064, "learning_rate": 2.046378419749706e-06, "loss": 0.3585, "step": 43460 }, { "epoch": 4.353984073721641, "grad_norm": 2.077836751937866, "learning_rate": 2.0401362956246256e-06, "loss": 0.4503, "step": 43470 }, { "epoch": 4.35498572644864, "grad_norm": 2.017932415008545, "learning_rate": 2.033903301306403e-06, "loss": 0.3723, "step": 43480 }, { "epoch": 4.35598737917564, "grad_norm": 2.4183194637298584, "learning_rate": 2.0276794392735444e-06, "loss": 0.4098, "step": 43490 }, { "epoch": 4.356989031902639, "grad_norm": 2.4014265537261963, "learning_rate": 2.0214647120009173e-06, "loss": 0.4118, "step": 43500 }, { "epoch": 4.357990684629639, "grad_norm": 2.41623854637146, "learning_rate": 2.015259121959759e-06, "loss": 0.4008, "step": 43510 }, { "epoch": 4.358992337356638, "grad_norm": 2.6848697662353516, "learning_rate": 2.00906267161767e-06, "loss": 0.4053, "step": 43520 }, { "epoch": 4.359993990083638, "grad_norm": 2.032064437866211, "learning_rate": 2.002875363438622e-06, "loss": 0.3978, "step": 43530 }, { "epoch": 4.360995642810638, "grad_norm": 1.95393967628479, "learning_rate": 1.9966971998829463e-06, "loss": 0.3873, "step": 43540 }, { "epoch": 4.361997295537637, "grad_norm": 2.1058743000030518, "learning_rate": 1.990528183407339e-06, "loss": 0.4167, "step": 43550 }, { "epoch": 4.362998948264637, "grad_norm": 2.3774189949035645, "learning_rate": 1.984368316464874e-06, "loss": 0.3911, "step": 43560 }, { "epoch": 4.364000600991636, "grad_norm": 1.8962682485580444, "learning_rate": 1.978217601504956e-06, "loss": 0.3933, "step": 43570 }, { "epoch": 4.365002253718636, "grad_norm": 2.469862937927246, "learning_rate": 1.972076040973372e-06, "loss": 0.4414, "step": 43580 }, { "epoch": 4.366003906445635, "grad_norm": 2.292123794555664, "learning_rate": 1.965943637312276e-06, "loss": 0.3814, "step": 43590 }, { "epoch": 4.367005559172635, "grad_norm": 2.4966185092926025, "learning_rate": 1.9598203929601573e-06, "loss": 0.4514, "step": 43600 }, { "epoch": 4.368007211899634, "grad_norm": 2.058504104614258, "learning_rate": 1.9537063103518772e-06, "loss": 0.3639, "step": 43610 }, { "epoch": 4.369008864626634, "grad_norm": 2.378732919692993, "learning_rate": 1.947601391918649e-06, "loss": 0.3669, "step": 43620 }, { "epoch": 4.370010517353633, "grad_norm": 2.036379337310791, "learning_rate": 1.9415056400880593e-06, "loss": 0.3781, "step": 43630 }, { "epoch": 4.371012170080633, "grad_norm": 2.2540555000305176, "learning_rate": 1.9354190572840187e-06, "loss": 0.4094, "step": 43640 }, { "epoch": 4.372013822807633, "grad_norm": 1.785356879234314, "learning_rate": 1.9293416459268134e-06, "loss": 0.4068, "step": 43650 }, { "epoch": 4.373015475534632, "grad_norm": 2.5779080390930176, "learning_rate": 1.9232734084330824e-06, "loss": 0.4351, "step": 43660 }, { "epoch": 4.374017128261632, "grad_norm": 1.8296396732330322, "learning_rate": 1.9172143472158122e-06, "loss": 0.3672, "step": 43670 }, { "epoch": 4.375018780988631, "grad_norm": 2.4608004093170166, "learning_rate": 1.9111644646843284e-06, "loss": 0.4167, "step": 43680 }, { "epoch": 4.376020433715631, "grad_norm": 1.9104523658752441, "learning_rate": 1.905123763244329e-06, "loss": 0.375, "step": 43690 }, { "epoch": 4.37702208644263, "grad_norm": 1.8407065868377686, "learning_rate": 1.8990922452978565e-06, "loss": 0.4115, "step": 43700 }, { "epoch": 4.37802373916963, "grad_norm": 1.947320818901062, "learning_rate": 1.8930699132432784e-06, "loss": 0.4018, "step": 43710 }, { "epoch": 4.379025391896629, "grad_norm": 2.074536085128784, "learning_rate": 1.88705676947534e-06, "loss": 0.4216, "step": 43720 }, { "epoch": 4.380027044623629, "grad_norm": 2.215348958969116, "learning_rate": 1.8810528163851177e-06, "loss": 0.436, "step": 43730 }, { "epoch": 4.3810286973506285, "grad_norm": 2.0011367797851562, "learning_rate": 1.8750580563600351e-06, "loss": 0.3784, "step": 43740 }, { "epoch": 4.382030350077628, "grad_norm": 2.45017409324646, "learning_rate": 1.8690724917838603e-06, "loss": 0.3976, "step": 43750 }, { "epoch": 4.383032002804628, "grad_norm": 2.329396963119507, "learning_rate": 1.8630961250367062e-06, "loss": 0.3553, "step": 43760 }, { "epoch": 4.384033655531627, "grad_norm": 2.6597559452056885, "learning_rate": 1.8571289584950302e-06, "loss": 0.4332, "step": 43770 }, { "epoch": 4.385035308258627, "grad_norm": 2.527765989303589, "learning_rate": 1.8511709945316146e-06, "loss": 0.4534, "step": 43780 }, { "epoch": 4.386036960985626, "grad_norm": 2.524082899093628, "learning_rate": 1.8452222355156146e-06, "loss": 0.3753, "step": 43790 }, { "epoch": 4.387038613712626, "grad_norm": 2.056520938873291, "learning_rate": 1.8392826838124961e-06, "loss": 0.4125, "step": 43800 }, { "epoch": 4.388040266439625, "grad_norm": 1.816526174545288, "learning_rate": 1.8333523417840782e-06, "loss": 0.4369, "step": 43810 }, { "epoch": 4.389041919166625, "grad_norm": 2.039801836013794, "learning_rate": 1.8274312117885135e-06, "loss": 0.3958, "step": 43820 }, { "epoch": 4.390043571893624, "grad_norm": 2.485262155532837, "learning_rate": 1.8215192961802907e-06, "loss": 0.3797, "step": 43830 }, { "epoch": 4.391045224620624, "grad_norm": 3.454650402069092, "learning_rate": 1.8156165973102379e-06, "loss": 0.3839, "step": 43840 }, { "epoch": 4.392046877347624, "grad_norm": 2.490710735321045, "learning_rate": 1.8097231175255163e-06, "loss": 0.4095, "step": 43850 }, { "epoch": 4.393048530074623, "grad_norm": 1.9275168180465698, "learning_rate": 1.803838859169621e-06, "loss": 0.3915, "step": 43860 }, { "epoch": 4.394050182801623, "grad_norm": 1.8556205034255981, "learning_rate": 1.7979638245823771e-06, "loss": 0.457, "step": 43870 }, { "epoch": 4.395051835528622, "grad_norm": 2.13421893119812, "learning_rate": 1.7920980160999496e-06, "loss": 0.4821, "step": 43880 }, { "epoch": 4.396053488255622, "grad_norm": 1.8426682949066162, "learning_rate": 1.786241436054828e-06, "loss": 0.3621, "step": 43890 }, { "epoch": 4.397055140982621, "grad_norm": 2.002091884613037, "learning_rate": 1.7803940867758384e-06, "loss": 0.4091, "step": 43900 }, { "epoch": 4.398056793709621, "grad_norm": 2.000837564468384, "learning_rate": 1.7745559705881236e-06, "loss": 0.3843, "step": 43910 }, { "epoch": 4.39905844643662, "grad_norm": 2.5199735164642334, "learning_rate": 1.7687270898131796e-06, "loss": 0.4583, "step": 43920 }, { "epoch": 4.40006009916362, "grad_norm": 2.1583092212677, "learning_rate": 1.7629074467687995e-06, "loss": 0.3926, "step": 43930 }, { "epoch": 4.401061751890619, "grad_norm": 1.873566746711731, "learning_rate": 1.7570970437691238e-06, "loss": 0.4649, "step": 43940 }, { "epoch": 4.4020634046176195, "grad_norm": 1.9938627481460571, "learning_rate": 1.7512958831246096e-06, "loss": 0.4128, "step": 43950 }, { "epoch": 4.403065057344619, "grad_norm": 2.516998529434204, "learning_rate": 1.7455039671420537e-06, "loss": 0.3994, "step": 43960 }, { "epoch": 4.404066710071618, "grad_norm": 1.9853394031524658, "learning_rate": 1.7397212981245526e-06, "loss": 0.4124, "step": 43970 }, { "epoch": 4.405068362798618, "grad_norm": 2.252943515777588, "learning_rate": 1.733947878371539e-06, "loss": 0.3939, "step": 43980 }, { "epoch": 4.406070015525617, "grad_norm": 1.9540263414382935, "learning_rate": 1.7281837101787797e-06, "loss": 0.3688, "step": 43990 }, { "epoch": 4.407071668252617, "grad_norm": 1.7471299171447754, "learning_rate": 1.722428795838335e-06, "loss": 0.3401, "step": 44000 }, { "epoch": 4.408073320979616, "grad_norm": 1.9910095930099487, "learning_rate": 1.7166831376386084e-06, "loss": 0.4205, "step": 44010 }, { "epoch": 4.409074973706616, "grad_norm": 2.40335750579834, "learning_rate": 1.7109467378643135e-06, "loss": 0.4245, "step": 44020 }, { "epoch": 4.410076626433615, "grad_norm": 2.117269515991211, "learning_rate": 1.7052195987964898e-06, "loss": 0.3946, "step": 44030 }, { "epoch": 4.411078279160615, "grad_norm": 1.9731988906860352, "learning_rate": 1.6995017227124766e-06, "loss": 0.3306, "step": 44040 }, { "epoch": 4.4120799318876145, "grad_norm": 2.510343551635742, "learning_rate": 1.6937931118859523e-06, "loss": 0.4009, "step": 44050 }, { "epoch": 4.4130815846146145, "grad_norm": 2.3110790252685547, "learning_rate": 1.6880937685869013e-06, "loss": 0.4243, "step": 44060 }, { "epoch": 4.414083237341614, "grad_norm": 2.2889299392700195, "learning_rate": 1.682403695081608e-06, "loss": 0.3852, "step": 44070 }, { "epoch": 4.415084890068613, "grad_norm": 2.237353563308716, "learning_rate": 1.6767228936326984e-06, "loss": 0.3906, "step": 44080 }, { "epoch": 4.416086542795613, "grad_norm": 2.174852132797241, "learning_rate": 1.6710513664990961e-06, "loss": 0.3828, "step": 44090 }, { "epoch": 4.417088195522612, "grad_norm": 2.5690200328826904, "learning_rate": 1.6653891159360391e-06, "loss": 0.4048, "step": 44100 }, { "epoch": 4.418089848249612, "grad_norm": 2.094517230987549, "learning_rate": 1.659736144195065e-06, "loss": 0.4225, "step": 44110 }, { "epoch": 4.419091500976611, "grad_norm": 2.416579246520996, "learning_rate": 1.654092453524045e-06, "loss": 0.4436, "step": 44120 }, { "epoch": 4.420093153703611, "grad_norm": 2.3031387329101562, "learning_rate": 1.648458046167145e-06, "loss": 0.4566, "step": 44130 }, { "epoch": 4.42109480643061, "grad_norm": 2.294541358947754, "learning_rate": 1.64283292436484e-06, "loss": 0.4522, "step": 44140 }, { "epoch": 4.42209645915761, "grad_norm": 1.5399305820465088, "learning_rate": 1.637217090353918e-06, "loss": 0.409, "step": 44150 }, { "epoch": 4.4230981118846096, "grad_norm": 2.1396875381469727, "learning_rate": 1.631610546367468e-06, "loss": 0.3919, "step": 44160 }, { "epoch": 4.42409976461161, "grad_norm": 2.2197341918945312, "learning_rate": 1.6260132946348899e-06, "loss": 0.4115, "step": 44170 }, { "epoch": 4.425101417338609, "grad_norm": 2.275446891784668, "learning_rate": 1.620425337381884e-06, "loss": 0.4239, "step": 44180 }, { "epoch": 4.426103070065608, "grad_norm": 2.164278745651245, "learning_rate": 1.6148466768304587e-06, "loss": 0.4411, "step": 44190 }, { "epoch": 4.427104722792608, "grad_norm": 2.4946508407592773, "learning_rate": 1.6092773151989204e-06, "loss": 0.4211, "step": 44200 }, { "epoch": 4.428106375519607, "grad_norm": 2.1352880001068115, "learning_rate": 1.6037172547018863e-06, "loss": 0.3815, "step": 44210 }, { "epoch": 4.429108028246607, "grad_norm": 2.4429593086242676, "learning_rate": 1.5981664975502685e-06, "loss": 0.474, "step": 44220 }, { "epoch": 4.430109680973606, "grad_norm": 1.741686463356018, "learning_rate": 1.5926250459512793e-06, "loss": 0.3641, "step": 44230 }, { "epoch": 4.431111333700606, "grad_norm": 1.501874327659607, "learning_rate": 1.5870929021084307e-06, "loss": 0.3908, "step": 44240 }, { "epoch": 4.432112986427605, "grad_norm": 2.2431552410125732, "learning_rate": 1.5815700682215439e-06, "loss": 0.4295, "step": 44250 }, { "epoch": 4.4331146391546055, "grad_norm": 2.8315837383270264, "learning_rate": 1.576056546486726e-06, "loss": 0.4185, "step": 44260 }, { "epoch": 4.434116291881605, "grad_norm": 2.2130463123321533, "learning_rate": 1.570552339096376e-06, "loss": 0.4278, "step": 44270 }, { "epoch": 4.435117944608605, "grad_norm": 2.1421968936920166, "learning_rate": 1.5650574482392183e-06, "loss": 0.4138, "step": 44280 }, { "epoch": 4.436119597335604, "grad_norm": 2.1757049560546875, "learning_rate": 1.5595718761002325e-06, "loss": 0.4759, "step": 44290 }, { "epoch": 4.437121250062603, "grad_norm": 2.8125619888305664, "learning_rate": 1.5540956248607213e-06, "loss": 0.4304, "step": 44300 }, { "epoch": 4.438122902789603, "grad_norm": 2.888702154159546, "learning_rate": 1.5486286966982677e-06, "loss": 0.4594, "step": 44310 }, { "epoch": 4.439124555516602, "grad_norm": 2.215304136276245, "learning_rate": 1.5431710937867633e-06, "loss": 0.3982, "step": 44320 }, { "epoch": 4.440126208243602, "grad_norm": 2.1499667167663574, "learning_rate": 1.537722818296372e-06, "loss": 0.421, "step": 44330 }, { "epoch": 4.441127860970601, "grad_norm": 2.082578420639038, "learning_rate": 1.53228387239355e-06, "loss": 0.4226, "step": 44340 }, { "epoch": 4.442129513697601, "grad_norm": 2.056445598602295, "learning_rate": 1.52685425824107e-06, "loss": 0.4324, "step": 44350 }, { "epoch": 4.4431311664246005, "grad_norm": 2.5467376708984375, "learning_rate": 1.5214339779979576e-06, "loss": 0.4099, "step": 44360 }, { "epoch": 4.4441328191516005, "grad_norm": 2.7463908195495605, "learning_rate": 1.5160230338195497e-06, "loss": 0.4159, "step": 44370 }, { "epoch": 4.4451344718786, "grad_norm": 2.3661184310913086, "learning_rate": 1.510621427857467e-06, "loss": 0.4228, "step": 44380 }, { "epoch": 4.4461361246056, "grad_norm": 2.045476198196411, "learning_rate": 1.5052291622596166e-06, "loss": 0.3782, "step": 44390 }, { "epoch": 4.447137777332599, "grad_norm": 2.825392007827759, "learning_rate": 1.49984623917018e-06, "loss": 0.4446, "step": 44400 }, { "epoch": 4.448139430059598, "grad_norm": 1.9934977293014526, "learning_rate": 1.4944726607296456e-06, "loss": 0.3826, "step": 44410 }, { "epoch": 4.449141082786598, "grad_norm": 2.085172414779663, "learning_rate": 1.4891084290747704e-06, "loss": 0.4111, "step": 44420 }, { "epoch": 4.450142735513597, "grad_norm": 2.056011199951172, "learning_rate": 1.4837535463385982e-06, "loss": 0.4359, "step": 44430 }, { "epoch": 4.451144388240597, "grad_norm": 2.0101840496063232, "learning_rate": 1.478408014650448e-06, "loss": 0.3443, "step": 44440 }, { "epoch": 4.452146040967596, "grad_norm": 2.3599886894226074, "learning_rate": 1.473071836135939e-06, "loss": 0.4214, "step": 44450 }, { "epoch": 4.453147693694596, "grad_norm": 2.235658884048462, "learning_rate": 1.4677450129169574e-06, "loss": 0.4071, "step": 44460 }, { "epoch": 4.4541493464215955, "grad_norm": 2.4682376384735107, "learning_rate": 1.4624275471116638e-06, "loss": 0.3985, "step": 44470 }, { "epoch": 4.455150999148596, "grad_norm": 2.524515390396118, "learning_rate": 1.4571194408345146e-06, "loss": 0.3838, "step": 44480 }, { "epoch": 4.456152651875595, "grad_norm": 2.4610915184020996, "learning_rate": 1.451820696196235e-06, "loss": 0.3972, "step": 44490 }, { "epoch": 4.457154304602594, "grad_norm": 1.954127311706543, "learning_rate": 1.4465313153038284e-06, "loss": 0.4061, "step": 44500 }, { "epoch": 4.458155957329594, "grad_norm": 1.9836078882217407, "learning_rate": 1.441251300260571e-06, "loss": 0.4063, "step": 44510 }, { "epoch": 4.459157610056593, "grad_norm": 2.0435895919799805, "learning_rate": 1.435980653166022e-06, "loss": 0.4643, "step": 44520 }, { "epoch": 4.460159262783593, "grad_norm": 2.140763998031616, "learning_rate": 1.4307193761160131e-06, "loss": 0.3666, "step": 44530 }, { "epoch": 4.461160915510592, "grad_norm": 2.599942445755005, "learning_rate": 1.4254674712026488e-06, "loss": 0.4522, "step": 44540 }, { "epoch": 4.462162568237592, "grad_norm": 1.9349007606506348, "learning_rate": 1.4202249405143032e-06, "loss": 0.3737, "step": 44550 }, { "epoch": 4.463164220964591, "grad_norm": 2.34694766998291, "learning_rate": 1.4149917861356331e-06, "loss": 0.4058, "step": 44560 }, { "epoch": 4.4641658736915915, "grad_norm": 2.036736249923706, "learning_rate": 1.4097680101475553e-06, "loss": 0.3698, "step": 44570 }, { "epoch": 4.465167526418591, "grad_norm": 2.2654614448547363, "learning_rate": 1.404553614627266e-06, "loss": 0.4287, "step": 44580 }, { "epoch": 4.466169179145591, "grad_norm": 1.855678915977478, "learning_rate": 1.399348601648226e-06, "loss": 0.3928, "step": 44590 }, { "epoch": 4.46717083187259, "grad_norm": 2.315638303756714, "learning_rate": 1.3941529732801662e-06, "loss": 0.424, "step": 44600 }, { "epoch": 4.468172484599589, "grad_norm": 2.044198989868164, "learning_rate": 1.3889667315890948e-06, "loss": 0.4122, "step": 44610 }, { "epoch": 4.469174137326589, "grad_norm": 2.004559278488159, "learning_rate": 1.3837898786372704e-06, "loss": 0.3801, "step": 44620 }, { "epoch": 4.470175790053588, "grad_norm": 1.7977226972579956, "learning_rate": 1.3786224164832302e-06, "loss": 0.3668, "step": 44630 }, { "epoch": 4.471177442780588, "grad_norm": 1.8870209455490112, "learning_rate": 1.3734643471817743e-06, "loss": 0.4103, "step": 44640 }, { "epoch": 4.472179095507587, "grad_norm": 1.5372698307037354, "learning_rate": 1.368315672783968e-06, "loss": 0.4171, "step": 44650 }, { "epoch": 4.473180748234587, "grad_norm": 2.3017232418060303, "learning_rate": 1.3631763953371402e-06, "loss": 0.3999, "step": 44660 }, { "epoch": 4.4741824009615865, "grad_norm": 2.015127182006836, "learning_rate": 1.358046516884881e-06, "loss": 0.4281, "step": 44670 }, { "epoch": 4.4751840536885865, "grad_norm": 2.389350175857544, "learning_rate": 1.3529260394670562e-06, "loss": 0.4312, "step": 44680 }, { "epoch": 4.476185706415586, "grad_norm": 2.0506579875946045, "learning_rate": 1.347814965119773e-06, "loss": 0.4292, "step": 44690 }, { "epoch": 4.477187359142585, "grad_norm": 2.1258811950683594, "learning_rate": 1.3427132958754057e-06, "loss": 0.3413, "step": 44700 }, { "epoch": 4.478189011869585, "grad_norm": 2.5169951915740967, "learning_rate": 1.3376210337626037e-06, "loss": 0.5072, "step": 44710 }, { "epoch": 4.479190664596584, "grad_norm": 2.3843328952789307, "learning_rate": 1.3325381808062641e-06, "loss": 0.4088, "step": 44720 }, { "epoch": 4.480192317323584, "grad_norm": 2.526643753051758, "learning_rate": 1.3274647390275314e-06, "loss": 0.3923, "step": 44730 }, { "epoch": 4.481193970050583, "grad_norm": 2.592135429382324, "learning_rate": 1.3224007104438273e-06, "loss": 0.4091, "step": 44740 }, { "epoch": 4.482195622777583, "grad_norm": 2.1877357959747314, "learning_rate": 1.3173460970688251e-06, "loss": 0.4152, "step": 44750 }, { "epoch": 4.483197275504582, "grad_norm": 2.017637014389038, "learning_rate": 1.3123009009124442e-06, "loss": 0.471, "step": 44760 }, { "epoch": 4.484198928231582, "grad_norm": 2.706550359725952, "learning_rate": 1.3072651239808692e-06, "loss": 0.4349, "step": 44770 }, { "epoch": 4.4852005809585815, "grad_norm": 2.889103889465332, "learning_rate": 1.3022387682765398e-06, "loss": 0.3704, "step": 44780 }, { "epoch": 4.486202233685582, "grad_norm": 2.3987677097320557, "learning_rate": 1.2972218357981458e-06, "loss": 0.3974, "step": 44790 }, { "epoch": 4.487203886412581, "grad_norm": 2.7593445777893066, "learning_rate": 1.2922143285406224e-06, "loss": 0.4262, "step": 44800 }, { "epoch": 4.48820553913958, "grad_norm": 2.0148863792419434, "learning_rate": 1.2872162484951738e-06, "loss": 0.4055, "step": 44810 }, { "epoch": 4.48920719186658, "grad_norm": 2.1033871173858643, "learning_rate": 1.2822275976492493e-06, "loss": 0.4186, "step": 44820 }, { "epoch": 4.490208844593579, "grad_norm": 2.587449312210083, "learning_rate": 1.277248377986534e-06, "loss": 0.3816, "step": 44830 }, { "epoch": 4.491210497320579, "grad_norm": 2.1474623680114746, "learning_rate": 1.2722785914869862e-06, "loss": 0.3941, "step": 44840 }, { "epoch": 4.492212150047578, "grad_norm": 2.459506034851074, "learning_rate": 1.267318240126794e-06, "loss": 0.361, "step": 44850 }, { "epoch": 4.493213802774578, "grad_norm": 2.14664363861084, "learning_rate": 1.262367325878408e-06, "loss": 0.4469, "step": 44860 }, { "epoch": 4.494215455501577, "grad_norm": 2.295531749725342, "learning_rate": 1.2574258507105168e-06, "loss": 0.5106, "step": 44870 }, { "epoch": 4.4952171082285775, "grad_norm": 2.360304594039917, "learning_rate": 1.2524938165880601e-06, "loss": 0.4323, "step": 44880 }, { "epoch": 4.496218760955577, "grad_norm": 2.631624937057495, "learning_rate": 1.2475712254722188e-06, "loss": 0.4318, "step": 44890 }, { "epoch": 4.497220413682577, "grad_norm": 1.8856874704360962, "learning_rate": 1.2426580793204246e-06, "loss": 0.4143, "step": 44900 }, { "epoch": 4.498222066409576, "grad_norm": 2.2703697681427, "learning_rate": 1.2377543800863505e-06, "loss": 0.4279, "step": 44910 }, { "epoch": 4.499223719136575, "grad_norm": 1.7943007946014404, "learning_rate": 1.2328601297199121e-06, "loss": 0.3452, "step": 44920 }, { "epoch": 4.500225371863575, "grad_norm": 1.8310256004333496, "learning_rate": 1.2279753301672691e-06, "loss": 0.4461, "step": 44930 }, { "epoch": 4.501227024590574, "grad_norm": 2.538496494293213, "learning_rate": 1.2230999833708262e-06, "loss": 0.3722, "step": 44940 }, { "epoch": 4.502228677317574, "grad_norm": 1.8670365810394287, "learning_rate": 1.218234091269224e-06, "loss": 0.3366, "step": 44950 }, { "epoch": 4.503230330044573, "grad_norm": 2.470527410507202, "learning_rate": 1.2133776557973458e-06, "loss": 0.3661, "step": 44960 }, { "epoch": 4.504231982771573, "grad_norm": 2.3969390392303467, "learning_rate": 1.208530678886316e-06, "loss": 0.3995, "step": 44970 }, { "epoch": 4.5052336354985725, "grad_norm": 1.7786046266555786, "learning_rate": 1.2036931624634928e-06, "loss": 0.3834, "step": 44980 }, { "epoch": 4.5062352882255725, "grad_norm": 1.8966312408447266, "learning_rate": 1.1988651084524794e-06, "loss": 0.4019, "step": 44990 }, { "epoch": 4.507236940952572, "grad_norm": 2.385106325149536, "learning_rate": 1.1940465187731093e-06, "loss": 0.3839, "step": 45000 }, { "epoch": 4.508238593679572, "grad_norm": 1.5654442310333252, "learning_rate": 1.1892373953414638e-06, "loss": 0.3852, "step": 45010 }, { "epoch": 4.509240246406571, "grad_norm": 2.0553817749023438, "learning_rate": 1.1844377400698437e-06, "loss": 0.4238, "step": 45020 }, { "epoch": 4.51024189913357, "grad_norm": 1.7214395999908447, "learning_rate": 1.1796475548667945e-06, "loss": 0.3799, "step": 45030 }, { "epoch": 4.51124355186057, "grad_norm": 2.3973593711853027, "learning_rate": 1.1748668416371067e-06, "loss": 0.4252, "step": 45040 }, { "epoch": 4.512245204587569, "grad_norm": 1.9291691780090332, "learning_rate": 1.1700956022817788e-06, "loss": 0.4175, "step": 45050 }, { "epoch": 4.513246857314569, "grad_norm": 2.493912696838379, "learning_rate": 1.16533383869806e-06, "loss": 0.3822, "step": 45060 }, { "epoch": 4.514248510041568, "grad_norm": 1.8617504835128784, "learning_rate": 1.1605815527794329e-06, "loss": 0.3491, "step": 45070 }, { "epoch": 4.515250162768568, "grad_norm": 2.023247003555298, "learning_rate": 1.1558387464156024e-06, "loss": 0.4175, "step": 45080 }, { "epoch": 4.5162518154955675, "grad_norm": 2.2523674964904785, "learning_rate": 1.151105421492507e-06, "loss": 0.4757, "step": 45090 }, { "epoch": 4.517253468222568, "grad_norm": 1.8958468437194824, "learning_rate": 1.1463815798923138e-06, "loss": 0.3856, "step": 45100 }, { "epoch": 4.518255120949567, "grad_norm": 1.961393117904663, "learning_rate": 1.1416672234934283e-06, "loss": 0.451, "step": 45110 }, { "epoch": 4.519256773676567, "grad_norm": 1.5756324529647827, "learning_rate": 1.1369623541704706e-06, "loss": 0.4175, "step": 45120 }, { "epoch": 4.520258426403566, "grad_norm": 1.8311176300048828, "learning_rate": 1.1322669737942908e-06, "loss": 0.4113, "step": 45130 }, { "epoch": 4.521260079130565, "grad_norm": 2.2952687740325928, "learning_rate": 1.1275810842319767e-06, "loss": 0.3718, "step": 45140 }, { "epoch": 4.522261731857565, "grad_norm": 1.6738089323043823, "learning_rate": 1.1229046873468374e-06, "loss": 0.372, "step": 45150 }, { "epoch": 4.523263384584564, "grad_norm": 2.8291242122650146, "learning_rate": 1.118237784998394e-06, "loss": 0.4321, "step": 45160 }, { "epoch": 4.524265037311564, "grad_norm": 2.192211866378784, "learning_rate": 1.1135803790424115e-06, "loss": 0.3983, "step": 45170 }, { "epoch": 4.525266690038563, "grad_norm": 2.6538708209991455, "learning_rate": 1.1089324713308674e-06, "loss": 0.3733, "step": 45180 }, { "epoch": 4.5262683427655634, "grad_norm": 2.111126661300659, "learning_rate": 1.1042940637119665e-06, "loss": 0.3748, "step": 45190 }, { "epoch": 4.527269995492563, "grad_norm": 2.8170530796051025, "learning_rate": 1.099665158030133e-06, "loss": 0.4561, "step": 45200 }, { "epoch": 4.528271648219563, "grad_norm": 1.8179787397384644, "learning_rate": 1.0950457561260174e-06, "loss": 0.3934, "step": 45210 }, { "epoch": 4.529273300946562, "grad_norm": 3.770756721496582, "learning_rate": 1.0904358598364833e-06, "loss": 0.3988, "step": 45220 }, { "epoch": 4.530274953673562, "grad_norm": 2.2152037620544434, "learning_rate": 1.085835470994623e-06, "loss": 0.386, "step": 45230 }, { "epoch": 4.531276606400561, "grad_norm": 2.2403817176818848, "learning_rate": 1.0812445914297447e-06, "loss": 0.4372, "step": 45240 }, { "epoch": 4.53227825912756, "grad_norm": 2.0329346656799316, "learning_rate": 1.0766632229673724e-06, "loss": 0.3938, "step": 45250 }, { "epoch": 4.53327991185456, "grad_norm": 2.1116416454315186, "learning_rate": 1.0720913674292509e-06, "loss": 0.4002, "step": 45260 }, { "epoch": 4.534281564581559, "grad_norm": 1.6225544214248657, "learning_rate": 1.0675290266333433e-06, "loss": 0.4409, "step": 45270 }, { "epoch": 4.535283217308559, "grad_norm": 2.520085334777832, "learning_rate": 1.0629762023938283e-06, "loss": 0.3987, "step": 45280 }, { "epoch": 4.5362848700355585, "grad_norm": 1.736752986907959, "learning_rate": 1.0584328965211e-06, "loss": 0.3976, "step": 45290 }, { "epoch": 4.5372865227625585, "grad_norm": 2.065598726272583, "learning_rate": 1.0538991108217682e-06, "loss": 0.4387, "step": 45300 }, { "epoch": 4.538288175489558, "grad_norm": 2.372802495956421, "learning_rate": 1.0493748470986554e-06, "loss": 0.4333, "step": 45310 }, { "epoch": 4.539289828216558, "grad_norm": 2.1573617458343506, "learning_rate": 1.0448601071507996e-06, "loss": 0.3845, "step": 45320 }, { "epoch": 4.540291480943557, "grad_norm": 2.4107489585876465, "learning_rate": 1.040354892773454e-06, "loss": 0.439, "step": 45330 }, { "epoch": 4.541293133670557, "grad_norm": 2.2853431701660156, "learning_rate": 1.0358592057580746e-06, "loss": 0.393, "step": 45340 }, { "epoch": 4.542294786397556, "grad_norm": 2.2682673931121826, "learning_rate": 1.0313730478923422e-06, "loss": 0.3983, "step": 45350 }, { "epoch": 4.543296439124555, "grad_norm": 1.9406390190124512, "learning_rate": 1.0268964209601328e-06, "loss": 0.3568, "step": 45360 }, { "epoch": 4.544298091851555, "grad_norm": 2.364281415939331, "learning_rate": 1.0224293267415558e-06, "loss": 0.4477, "step": 45370 }, { "epoch": 4.545299744578554, "grad_norm": 2.5253348350524902, "learning_rate": 1.0179717670129041e-06, "loss": 0.425, "step": 45380 }, { "epoch": 4.546301397305554, "grad_norm": 2.046555995941162, "learning_rate": 1.0135237435466932e-06, "loss": 0.4468, "step": 45390 }, { "epoch": 4.5473030500325535, "grad_norm": 1.9803757667541504, "learning_rate": 1.0090852581116473e-06, "loss": 0.4173, "step": 45400 }, { "epoch": 4.548304702759554, "grad_norm": 2.417950391769409, "learning_rate": 1.004656312472693e-06, "loss": 0.3773, "step": 45410 }, { "epoch": 4.549306355486553, "grad_norm": 2.228677749633789, "learning_rate": 1.0002369083909612e-06, "loss": 0.4586, "step": 45420 }, { "epoch": 4.550308008213553, "grad_norm": 2.234309196472168, "learning_rate": 9.958270476237957e-07, "loss": 0.4367, "step": 45430 }, { "epoch": 4.551309660940552, "grad_norm": 1.536161184310913, "learning_rate": 9.914267319247495e-07, "loss": 0.4218, "step": 45440 }, { "epoch": 4.552311313667552, "grad_norm": 2.13627552986145, "learning_rate": 9.870359630435616e-07, "loss": 0.397, "step": 45450 }, { "epoch": 4.553312966394551, "grad_norm": 2.022158145904541, "learning_rate": 9.826547427261913e-07, "loss": 0.4395, "step": 45460 }, { "epoch": 4.55431461912155, "grad_norm": 2.3157637119293213, "learning_rate": 9.782830727147974e-07, "loss": 0.4107, "step": 45470 }, { "epoch": 4.55531627184855, "grad_norm": 2.565974235534668, "learning_rate": 9.739209547477396e-07, "loss": 0.3774, "step": 45480 }, { "epoch": 4.556317924575549, "grad_norm": 2.3096580505371094, "learning_rate": 9.695683905595748e-07, "loss": 0.458, "step": 45490 }, { "epoch": 4.557319577302549, "grad_norm": 2.6375136375427246, "learning_rate": 9.652253818810686e-07, "loss": 0.4138, "step": 45500 }, { "epoch": 4.558321230029549, "grad_norm": 2.0477283000946045, "learning_rate": 9.608919304391895e-07, "loss": 0.4362, "step": 45510 }, { "epoch": 4.559322882756549, "grad_norm": 2.114675521850586, "learning_rate": 9.565680379570867e-07, "loss": 0.4118, "step": 45520 }, { "epoch": 4.560324535483548, "grad_norm": 1.7504016160964966, "learning_rate": 9.522537061541353e-07, "loss": 0.3752, "step": 45530 }, { "epoch": 4.561326188210547, "grad_norm": 2.3009567260742188, "learning_rate": 9.47948936745885e-07, "loss": 0.3958, "step": 45540 }, { "epoch": 4.562327840937547, "grad_norm": 2.306684732437134, "learning_rate": 9.436537314440996e-07, "loss": 0.4089, "step": 45550 }, { "epoch": 4.563329493664547, "grad_norm": 3.0576114654541016, "learning_rate": 9.393680919567299e-07, "loss": 0.4074, "step": 45560 }, { "epoch": 4.564331146391546, "grad_norm": 1.9671968221664429, "learning_rate": 9.350920199879265e-07, "loss": 0.3332, "step": 45570 }, { "epoch": 4.565332799118545, "grad_norm": 1.918282389640808, "learning_rate": 9.308255172380376e-07, "loss": 0.4116, "step": 45580 }, { "epoch": 4.566334451845545, "grad_norm": 1.908010721206665, "learning_rate": 9.265685854035977e-07, "loss": 0.3648, "step": 45590 }, { "epoch": 4.5673361045725445, "grad_norm": 1.6069321632385254, "learning_rate": 9.2232122617735e-07, "loss": 0.4405, "step": 45600 }, { "epoch": 4.5683377572995445, "grad_norm": 2.416090726852417, "learning_rate": 9.180834412482187e-07, "loss": 0.3838, "step": 45610 }, { "epoch": 4.569339410026544, "grad_norm": 1.9915950298309326, "learning_rate": 9.13855232301325e-07, "loss": 0.4142, "step": 45620 }, { "epoch": 4.570341062753544, "grad_norm": 2.693861722946167, "learning_rate": 9.096366010179852e-07, "loss": 0.4403, "step": 45630 }, { "epoch": 4.571342715480543, "grad_norm": 1.9319853782653809, "learning_rate": 9.054275490757019e-07, "loss": 0.4234, "step": 45640 }, { "epoch": 4.572344368207542, "grad_norm": 2.010544538497925, "learning_rate": 9.012280781481725e-07, "loss": 0.4433, "step": 45650 }, { "epoch": 4.573346020934542, "grad_norm": 2.1950645446777344, "learning_rate": 8.970381899052804e-07, "loss": 0.4073, "step": 45660 }, { "epoch": 4.574347673661541, "grad_norm": 1.9796150922775269, "learning_rate": 8.928578860131043e-07, "loss": 0.4217, "step": 45670 }, { "epoch": 4.575349326388541, "grad_norm": 2.374180316925049, "learning_rate": 8.886871681339087e-07, "loss": 0.3626, "step": 45680 }, { "epoch": 4.57635097911554, "grad_norm": 3.04028582572937, "learning_rate": 8.845260379261449e-07, "loss": 0.4371, "step": 45690 }, { "epoch": 4.57735263184254, "grad_norm": 2.386300563812256, "learning_rate": 8.803744970444533e-07, "loss": 0.3989, "step": 45700 }, { "epoch": 4.5783542845695395, "grad_norm": 2.734030246734619, "learning_rate": 8.762325471396632e-07, "loss": 0.4327, "step": 45710 }, { "epoch": 4.57935593729654, "grad_norm": 2.3858745098114014, "learning_rate": 8.721001898587822e-07, "loss": 0.4408, "step": 45720 }, { "epoch": 4.580357590023539, "grad_norm": 2.001619577407837, "learning_rate": 8.67977426845018e-07, "loss": 0.421, "step": 45730 }, { "epoch": 4.581359242750539, "grad_norm": 2.0118606090545654, "learning_rate": 8.638642597377483e-07, "loss": 0.442, "step": 45740 }, { "epoch": 4.582360895477538, "grad_norm": 2.007488489151001, "learning_rate": 8.597606901725397e-07, "loss": 0.4262, "step": 45750 }, { "epoch": 4.583362548204537, "grad_norm": 2.105195999145508, "learning_rate": 8.55666719781148e-07, "loss": 0.3884, "step": 45760 }, { "epoch": 4.584364200931537, "grad_norm": 1.8013733625411987, "learning_rate": 8.515823501915126e-07, "loss": 0.4489, "step": 45770 }, { "epoch": 4.585365853658536, "grad_norm": 2.1725831031799316, "learning_rate": 8.475075830277401e-07, "loss": 0.4059, "step": 45780 }, { "epoch": 4.586367506385536, "grad_norm": 1.905267357826233, "learning_rate": 8.434424199101315e-07, "loss": 0.3964, "step": 45790 }, { "epoch": 4.587369159112535, "grad_norm": 2.351881265640259, "learning_rate": 8.393868624551743e-07, "loss": 0.3971, "step": 45800 }, { "epoch": 4.588370811839535, "grad_norm": 1.9069644212722778, "learning_rate": 8.353409122755202e-07, "loss": 0.4131, "step": 45810 }, { "epoch": 4.589372464566535, "grad_norm": 2.0071794986724854, "learning_rate": 8.313045709800071e-07, "loss": 0.3534, "step": 45820 }, { "epoch": 4.590374117293535, "grad_norm": 1.9949153661727905, "learning_rate": 8.272778401736652e-07, "loss": 0.4288, "step": 45830 }, { "epoch": 4.591375770020534, "grad_norm": 2.0804128646850586, "learning_rate": 8.232607214576859e-07, "loss": 0.3968, "step": 45840 }, { "epoch": 4.592377422747534, "grad_norm": 2.215404987335205, "learning_rate": 8.192532164294414e-07, "loss": 0.4034, "step": 45850 }, { "epoch": 4.593379075474533, "grad_norm": 2.0437800884246826, "learning_rate": 8.152553266824875e-07, "loss": 0.4487, "step": 45860 }, { "epoch": 4.594380728201532, "grad_norm": 2.4165077209472656, "learning_rate": 8.112670538065553e-07, "loss": 0.3931, "step": 45870 }, { "epoch": 4.595382380928532, "grad_norm": 2.1966311931610107, "learning_rate": 8.072883993875429e-07, "loss": 0.3606, "step": 45880 }, { "epoch": 4.596384033655531, "grad_norm": 1.7312675714492798, "learning_rate": 8.033193650075349e-07, "loss": 0.4133, "step": 45890 }, { "epoch": 4.597385686382531, "grad_norm": 1.9515161514282227, "learning_rate": 7.993599522447881e-07, "loss": 0.4031, "step": 45900 }, { "epoch": 4.5983873391095305, "grad_norm": 2.2079174518585205, "learning_rate": 7.954101626737321e-07, "loss": 0.431, "step": 45910 }, { "epoch": 4.5993889918365305, "grad_norm": 2.438955783843994, "learning_rate": 7.914699978649604e-07, "loss": 0.3663, "step": 45920 }, { "epoch": 4.60039064456353, "grad_norm": 2.7964272499084473, "learning_rate": 7.875394593852559e-07, "loss": 0.4036, "step": 45930 }, { "epoch": 4.60139229729053, "grad_norm": 2.09883713722229, "learning_rate": 7.836185487975655e-07, "loss": 0.4253, "step": 45940 }, { "epoch": 4.602393950017529, "grad_norm": 2.234827995300293, "learning_rate": 7.797072676610062e-07, "loss": 0.3641, "step": 45950 }, { "epoch": 4.603395602744529, "grad_norm": 2.1803348064422607, "learning_rate": 7.75805617530867e-07, "loss": 0.38, "step": 45960 }, { "epoch": 4.604397255471528, "grad_norm": 2.11966872215271, "learning_rate": 7.719135999586125e-07, "loss": 0.402, "step": 45970 }, { "epoch": 4.605398908198527, "grad_norm": 1.670632243156433, "learning_rate": 7.680312164918657e-07, "loss": 0.356, "step": 45980 }, { "epoch": 4.606400560925527, "grad_norm": 2.887688636779785, "learning_rate": 7.641584686744308e-07, "loss": 0.4281, "step": 45990 }, { "epoch": 4.607402213652526, "grad_norm": 1.9670038223266602, "learning_rate": 7.602953580462729e-07, "loss": 0.4214, "step": 46000 }, { "epoch": 4.608403866379526, "grad_norm": 2.14485239982605, "learning_rate": 7.564418861435301e-07, "loss": 0.3554, "step": 46010 }, { "epoch": 4.6094055191065255, "grad_norm": 1.9791523218154907, "learning_rate": 7.525980544984989e-07, "loss": 0.3751, "step": 46020 }, { "epoch": 4.6104071718335256, "grad_norm": 2.46907901763916, "learning_rate": 7.487638646396539e-07, "loss": 0.4055, "step": 46030 }, { "epoch": 4.611408824560525, "grad_norm": 2.633350372314453, "learning_rate": 7.449393180916281e-07, "loss": 0.404, "step": 46040 }, { "epoch": 4.612410477287525, "grad_norm": 2.210999011993408, "learning_rate": 7.411244163752163e-07, "loss": 0.4498, "step": 46050 }, { "epoch": 4.613412130014524, "grad_norm": 2.5408952236175537, "learning_rate": 7.373191610073965e-07, "loss": 0.4118, "step": 46060 }, { "epoch": 4.614413782741524, "grad_norm": 2.2600090503692627, "learning_rate": 7.335235535012891e-07, "loss": 0.411, "step": 46070 }, { "epoch": 4.615415435468523, "grad_norm": 2.1320509910583496, "learning_rate": 7.297375953661867e-07, "loss": 0.4412, "step": 46080 }, { "epoch": 4.616417088195522, "grad_norm": 2.4685146808624268, "learning_rate": 7.259612881075517e-07, "loss": 0.4401, "step": 46090 }, { "epoch": 4.617418740922522, "grad_norm": 1.9967085123062134, "learning_rate": 7.221946332269968e-07, "loss": 0.4211, "step": 46100 }, { "epoch": 4.618420393649521, "grad_norm": 1.9377949237823486, "learning_rate": 7.184376322223019e-07, "loss": 0.3698, "step": 46110 }, { "epoch": 4.619422046376521, "grad_norm": 2.3374502658843994, "learning_rate": 7.146902865874105e-07, "loss": 0.3967, "step": 46120 }, { "epoch": 4.620423699103521, "grad_norm": 3.4399402141571045, "learning_rate": 7.10952597812431e-07, "loss": 0.3948, "step": 46130 }, { "epoch": 4.621425351830521, "grad_norm": 1.7531739473342896, "learning_rate": 7.072245673836131e-07, "loss": 0.4081, "step": 46140 }, { "epoch": 4.62242700455752, "grad_norm": 2.1112916469573975, "learning_rate": 7.03506196783385e-07, "loss": 0.3658, "step": 46150 }, { "epoch": 4.62342865728452, "grad_norm": 3.10821533203125, "learning_rate": 6.997974874903334e-07, "loss": 0.4272, "step": 46160 }, { "epoch": 4.624430310011519, "grad_norm": 2.686772346496582, "learning_rate": 6.960984409791871e-07, "loss": 0.3991, "step": 46170 }, { "epoch": 4.625431962738519, "grad_norm": 2.0568907260894775, "learning_rate": 6.924090587208415e-07, "loss": 0.4252, "step": 46180 }, { "epoch": 4.626433615465518, "grad_norm": 2.369635581970215, "learning_rate": 6.887293421823593e-07, "loss": 0.4062, "step": 46190 }, { "epoch": 4.627435268192517, "grad_norm": 2.34002947807312, "learning_rate": 6.850592928269478e-07, "loss": 0.3955, "step": 46200 }, { "epoch": 4.628436920919517, "grad_norm": 2.856238603591919, "learning_rate": 6.813989121139647e-07, "loss": 0.4216, "step": 46210 }, { "epoch": 4.6294385736465165, "grad_norm": 2.129307508468628, "learning_rate": 6.7774820149894e-07, "loss": 0.3689, "step": 46220 }, { "epoch": 4.6304402263735165, "grad_norm": 2.2219908237457275, "learning_rate": 6.741071624335459e-07, "loss": 0.4228, "step": 46230 }, { "epoch": 4.631441879100516, "grad_norm": 2.6033074855804443, "learning_rate": 6.704757963656189e-07, "loss": 0.4703, "step": 46240 }, { "epoch": 4.632443531827516, "grad_norm": 2.086947441101074, "learning_rate": 6.668541047391313e-07, "loss": 0.3758, "step": 46250 }, { "epoch": 4.633445184554515, "grad_norm": 1.4777419567108154, "learning_rate": 6.632420889942287e-07, "loss": 0.3655, "step": 46260 }, { "epoch": 4.634446837281515, "grad_norm": 2.539104461669922, "learning_rate": 6.596397505672009e-07, "loss": 0.4336, "step": 46270 }, { "epoch": 4.635448490008514, "grad_norm": 2.2465150356292725, "learning_rate": 6.560470908904798e-07, "loss": 0.4204, "step": 46280 }, { "epoch": 4.636450142735514, "grad_norm": 2.106008529663086, "learning_rate": 6.524641113926672e-07, "loss": 0.4228, "step": 46290 }, { "epoch": 4.637451795462513, "grad_norm": 3.1730639934539795, "learning_rate": 6.488908134985011e-07, "loss": 0.3906, "step": 46300 }, { "epoch": 4.638453448189512, "grad_norm": 2.27172589302063, "learning_rate": 6.453271986288812e-07, "loss": 0.3889, "step": 46310 }, { "epoch": 4.639455100916512, "grad_norm": 2.1306052207946777, "learning_rate": 6.417732682008431e-07, "loss": 0.4226, "step": 46320 }, { "epoch": 4.6404567536435115, "grad_norm": 3.0258591175079346, "learning_rate": 6.382290236275845e-07, "loss": 0.4184, "step": 46330 }, { "epoch": 4.6414584063705115, "grad_norm": 1.9228705167770386, "learning_rate": 6.346944663184418e-07, "loss": 0.3917, "step": 46340 }, { "epoch": 4.642460059097511, "grad_norm": 2.0833024978637695, "learning_rate": 6.311695976789073e-07, "loss": 0.4407, "step": 46350 }, { "epoch": 4.643461711824511, "grad_norm": 2.445340871810913, "learning_rate": 6.276544191106154e-07, "loss": 0.3477, "step": 46360 }, { "epoch": 4.64446336455151, "grad_norm": 2.1367125511169434, "learning_rate": 6.241489320113453e-07, "loss": 0.3996, "step": 46370 }, { "epoch": 4.64546501727851, "grad_norm": 2.3300392627716064, "learning_rate": 6.206531377750319e-07, "loss": 0.3829, "step": 46380 }, { "epoch": 4.646466670005509, "grad_norm": 2.5135016441345215, "learning_rate": 6.171670377917465e-07, "loss": 0.3779, "step": 46390 }, { "epoch": 4.647468322732509, "grad_norm": 2.5999858379364014, "learning_rate": 6.136906334477111e-07, "loss": 0.3559, "step": 46400 }, { "epoch": 4.648469975459508, "grad_norm": 2.109624147415161, "learning_rate": 6.102239261252862e-07, "loss": 0.3592, "step": 46410 }, { "epoch": 4.649471628186507, "grad_norm": 2.480128526687622, "learning_rate": 6.067669172029888e-07, "loss": 0.4487, "step": 46420 }, { "epoch": 4.650473280913507, "grad_norm": 2.0874390602111816, "learning_rate": 6.03319608055461e-07, "loss": 0.4079, "step": 46430 }, { "epoch": 4.651474933640507, "grad_norm": 2.6308510303497314, "learning_rate": 5.998820000535005e-07, "loss": 0.4327, "step": 46440 }, { "epoch": 4.652476586367507, "grad_norm": 1.756169319152832, "learning_rate": 5.964540945640501e-07, "loss": 0.4047, "step": 46450 }, { "epoch": 4.653478239094506, "grad_norm": 1.9640753269195557, "learning_rate": 5.930358929501834e-07, "loss": 0.4348, "step": 46460 }, { "epoch": 4.654479891821506, "grad_norm": 1.7669875621795654, "learning_rate": 5.896273965711213e-07, "loss": 0.3855, "step": 46470 }, { "epoch": 4.655481544548505, "grad_norm": 3.000284433364868, "learning_rate": 5.86228606782227e-07, "loss": 0.3811, "step": 46480 }, { "epoch": 4.656483197275505, "grad_norm": 2.0170233249664307, "learning_rate": 5.828395249350054e-07, "loss": 0.3869, "step": 46490 }, { "epoch": 4.657484850002504, "grad_norm": 2.0546176433563232, "learning_rate": 5.794601523770926e-07, "loss": 0.4393, "step": 46500 }, { "epoch": 4.658486502729504, "grad_norm": 2.225003719329834, "learning_rate": 5.76090490452269e-07, "loss": 0.4438, "step": 46510 }, { "epoch": 4.659488155456503, "grad_norm": 1.9720081090927124, "learning_rate": 5.727305405004574e-07, "loss": 0.3664, "step": 46520 }, { "epoch": 4.6604898081835024, "grad_norm": 2.1545374393463135, "learning_rate": 5.693803038577167e-07, "loss": 0.3775, "step": 46530 }, { "epoch": 4.6614914609105025, "grad_norm": 1.8879632949829102, "learning_rate": 5.660397818562341e-07, "loss": 0.4258, "step": 46540 }, { "epoch": 4.662493113637502, "grad_norm": 2.4001457691192627, "learning_rate": 5.627089758243498e-07, "loss": 0.3909, "step": 46550 }, { "epoch": 4.663494766364502, "grad_norm": 2.4746174812316895, "learning_rate": 5.593878870865294e-07, "loss": 0.4284, "step": 46560 }, { "epoch": 4.664496419091501, "grad_norm": 2.5044631958007812, "learning_rate": 5.56076516963372e-07, "loss": 0.4111, "step": 46570 }, { "epoch": 4.665498071818501, "grad_norm": 2.208523988723755, "learning_rate": 5.527748667716243e-07, "loss": 0.4011, "step": 46580 }, { "epoch": 4.6664997245455, "grad_norm": 1.9320225715637207, "learning_rate": 5.494829378241584e-07, "loss": 0.4025, "step": 46590 }, { "epoch": 4.667501377272499, "grad_norm": 2.7530622482299805, "learning_rate": 5.462007314299883e-07, "loss": 0.3827, "step": 46600 }, { "epoch": 4.668503029999499, "grad_norm": 1.7465649843215942, "learning_rate": 5.429282488942477e-07, "loss": 0.3867, "step": 46610 }, { "epoch": 4.669504682726499, "grad_norm": 1.7411669492721558, "learning_rate": 5.396654915182209e-07, "loss": 0.4054, "step": 46620 }, { "epoch": 4.670506335453498, "grad_norm": 1.8928498029708862, "learning_rate": 5.36412460599317e-07, "loss": 0.4182, "step": 46630 }, { "epoch": 4.6715079881804975, "grad_norm": 2.3412082195281982, "learning_rate": 5.33169157431071e-07, "loss": 0.4115, "step": 46640 }, { "epoch": 4.6725096409074975, "grad_norm": 2.5557522773742676, "learning_rate": 5.29935583303165e-07, "loss": 0.3666, "step": 46650 }, { "epoch": 4.673511293634497, "grad_norm": 2.372802972793579, "learning_rate": 5.267117395014009e-07, "loss": 0.4566, "step": 46660 }, { "epoch": 4.674512946361497, "grad_norm": 2.2661385536193848, "learning_rate": 5.234976273077147e-07, "loss": 0.417, "step": 46670 }, { "epoch": 4.675514599088496, "grad_norm": 2.0665102005004883, "learning_rate": 5.202932480001699e-07, "loss": 0.3484, "step": 46680 }, { "epoch": 4.676516251815496, "grad_norm": 2.0566537380218506, "learning_rate": 5.170986028529667e-07, "loss": 0.3725, "step": 46690 }, { "epoch": 4.677517904542495, "grad_norm": 1.9390207529067993, "learning_rate": 5.139136931364252e-07, "loss": 0.4094, "step": 46700 }, { "epoch": 4.678519557269494, "grad_norm": 2.6145527362823486, "learning_rate": 5.107385201170045e-07, "loss": 0.4093, "step": 46710 }, { "epoch": 4.679521209996494, "grad_norm": 1.9652702808380127, "learning_rate": 5.075730850572835e-07, "loss": 0.416, "step": 46720 }, { "epoch": 4.680522862723493, "grad_norm": 2.30545973777771, "learning_rate": 5.044173892159748e-07, "loss": 0.3941, "step": 46730 }, { "epoch": 4.681524515450493, "grad_norm": 1.984140157699585, "learning_rate": 5.012714338479135e-07, "loss": 0.4178, "step": 46740 }, { "epoch": 4.682526168177493, "grad_norm": 2.0339691638946533, "learning_rate": 4.981352202040628e-07, "loss": 0.3753, "step": 46750 }, { "epoch": 4.683527820904493, "grad_norm": 1.7999358177185059, "learning_rate": 4.95008749531517e-07, "loss": 0.3793, "step": 46760 }, { "epoch": 4.684529473631492, "grad_norm": 1.8290388584136963, "learning_rate": 4.91892023073487e-07, "loss": 0.4243, "step": 46770 }, { "epoch": 4.685531126358492, "grad_norm": 2.1667447090148926, "learning_rate": 4.887850420693202e-07, "loss": 0.4241, "step": 46780 }, { "epoch": 4.686532779085491, "grad_norm": 2.310354471206665, "learning_rate": 4.856878077544785e-07, "loss": 0.4206, "step": 46790 }, { "epoch": 4.687534431812491, "grad_norm": 2.7219927310943604, "learning_rate": 4.826003213605545e-07, "loss": 0.4295, "step": 46800 }, { "epoch": 4.68853608453949, "grad_norm": 1.945172667503357, "learning_rate": 4.795225841152579e-07, "loss": 0.4499, "step": 46810 }, { "epoch": 4.689537737266489, "grad_norm": 2.1988821029663086, "learning_rate": 4.7645459724243444e-07, "loss": 0.403, "step": 46820 }, { "epoch": 4.690539389993489, "grad_norm": 2.19480562210083, "learning_rate": 4.7339636196204184e-07, "loss": 0.3844, "step": 46830 }, { "epoch": 4.691541042720488, "grad_norm": 2.085426092147827, "learning_rate": 4.703478794901572e-07, "loss": 0.4018, "step": 46840 }, { "epoch": 4.6925426954474885, "grad_norm": 2.0894999504089355, "learning_rate": 4.673091510389943e-07, "loss": 0.3954, "step": 46850 }, { "epoch": 4.693544348174488, "grad_norm": 2.0696043968200684, "learning_rate": 4.642801778168726e-07, "loss": 0.4097, "step": 46860 }, { "epoch": 4.694546000901488, "grad_norm": 2.10793137550354, "learning_rate": 4.61260961028237e-07, "loss": 0.3324, "step": 46870 }, { "epoch": 4.695547653628487, "grad_norm": 2.361173629760742, "learning_rate": 4.582515018736633e-07, "loss": 0.4199, "step": 46880 }, { "epoch": 4.696549306355487, "grad_norm": 2.2172489166259766, "learning_rate": 4.552518015498386e-07, "loss": 0.3685, "step": 46890 }, { "epoch": 4.697550959082486, "grad_norm": 2.353889226913452, "learning_rate": 4.522618612495588e-07, "loss": 0.4178, "step": 46900 }, { "epoch": 4.698552611809486, "grad_norm": 1.8032987117767334, "learning_rate": 4.492816821617618e-07, "loss": 0.4407, "step": 46910 }, { "epoch": 4.699554264536485, "grad_norm": 2.0415968894958496, "learning_rate": 4.463112654714885e-07, "loss": 0.3827, "step": 46920 }, { "epoch": 4.700555917263484, "grad_norm": 2.6588540077209473, "learning_rate": 4.433506123598996e-07, "loss": 0.3954, "step": 46930 }, { "epoch": 4.701557569990484, "grad_norm": 2.428084373474121, "learning_rate": 4.4039972400427286e-07, "loss": 0.4364, "step": 46940 }, { "epoch": 4.7025592227174835, "grad_norm": 2.2132630348205566, "learning_rate": 4.374586015780113e-07, "loss": 0.474, "step": 46950 }, { "epoch": 4.7035608754444835, "grad_norm": 2.4782333374023438, "learning_rate": 4.3452724625062946e-07, "loss": 0.4194, "step": 46960 }, { "epoch": 4.704562528171483, "grad_norm": 1.8694998025894165, "learning_rate": 4.3160565918774767e-07, "loss": 0.379, "step": 46970 }, { "epoch": 4.705564180898483, "grad_norm": 1.9565788507461548, "learning_rate": 4.286938415511227e-07, "loss": 0.4134, "step": 46980 }, { "epoch": 4.706565833625482, "grad_norm": 2.3455045223236084, "learning_rate": 4.2579179449860896e-07, "loss": 0.3826, "step": 46990 }, { "epoch": 4.707567486352482, "grad_norm": 1.9453314542770386, "learning_rate": 4.228995191841861e-07, "loss": 0.3892, "step": 47000 }, { "epoch": 4.708569139079481, "grad_norm": 1.8712366819381714, "learning_rate": 4.200170167579426e-07, "loss": 0.4426, "step": 47010 }, { "epoch": 4.709570791806481, "grad_norm": 1.9517831802368164, "learning_rate": 4.171442883660809e-07, "loss": 0.371, "step": 47020 }, { "epoch": 4.71057244453348, "grad_norm": 2.4578795433044434, "learning_rate": 4.142813351509234e-07, "loss": 0.4289, "step": 47030 }, { "epoch": 4.711574097260479, "grad_norm": 1.4846292734146118, "learning_rate": 4.114281582508955e-07, "loss": 0.3575, "step": 47040 }, { "epoch": 4.712575749987479, "grad_norm": 2.2234723567962646, "learning_rate": 4.0858475880054537e-07, "loss": 0.4322, "step": 47050 }, { "epoch": 4.713577402714479, "grad_norm": 1.7275258302688599, "learning_rate": 4.057511379305212e-07, "loss": 0.3939, "step": 47060 }, { "epoch": 4.714579055441479, "grad_norm": 2.4854557514190674, "learning_rate": 4.029272967675968e-07, "loss": 0.4135, "step": 47070 }, { "epoch": 4.715580708168478, "grad_norm": 3.625199317932129, "learning_rate": 4.0011323643464605e-07, "loss": 0.4075, "step": 47080 }, { "epoch": 4.716582360895478, "grad_norm": 1.9963372945785522, "learning_rate": 3.9730895805066e-07, "loss": 0.4061, "step": 47090 }, { "epoch": 4.717584013622477, "grad_norm": 2.0822815895080566, "learning_rate": 3.945144627307329e-07, "loss": 0.42, "step": 47100 }, { "epoch": 4.718585666349477, "grad_norm": 1.8344260454177856, "learning_rate": 3.917297515860813e-07, "loss": 0.4285, "step": 47110 }, { "epoch": 4.719587319076476, "grad_norm": 2.9358978271484375, "learning_rate": 3.8895482572401664e-07, "loss": 0.4028, "step": 47120 }, { "epoch": 4.720588971803476, "grad_norm": 1.8024274110794067, "learning_rate": 3.861896862479675e-07, "loss": 0.3655, "step": 47130 }, { "epoch": 4.721590624530475, "grad_norm": 2.0380611419677734, "learning_rate": 3.8343433425747365e-07, "loss": 0.3879, "step": 47140 }, { "epoch": 4.722592277257474, "grad_norm": 1.7357391119003296, "learning_rate": 3.8068877084817267e-07, "loss": 0.3821, "step": 47150 }, { "epoch": 4.7235939299844745, "grad_norm": 2.1513285636901855, "learning_rate": 3.7795299711182173e-07, "loss": 0.4278, "step": 47160 }, { "epoch": 4.724595582711474, "grad_norm": 2.3541009426116943, "learning_rate": 3.752270141362729e-07, "loss": 0.4123, "step": 47170 }, { "epoch": 4.725597235438474, "grad_norm": 2.2936346530914307, "learning_rate": 3.72510823005498e-07, "loss": 0.4565, "step": 47180 }, { "epoch": 4.726598888165473, "grad_norm": 2.1213440895080566, "learning_rate": 3.6980442479956633e-07, "loss": 0.3844, "step": 47190 }, { "epoch": 4.727600540892473, "grad_norm": 2.0257651805877686, "learning_rate": 3.671078205946532e-07, "loss": 0.4299, "step": 47200 }, { "epoch": 4.728602193619472, "grad_norm": 2.968496561050415, "learning_rate": 3.644210114630481e-07, "loss": 0.4463, "step": 47210 }, { "epoch": 4.729603846346472, "grad_norm": 1.8193625211715698, "learning_rate": 3.6174399847313525e-07, "loss": 0.4118, "step": 47220 }, { "epoch": 4.730605499073471, "grad_norm": 2.025061845779419, "learning_rate": 3.5907678268940206e-07, "loss": 0.3836, "step": 47230 }, { "epoch": 4.731607151800471, "grad_norm": 1.9306206703186035, "learning_rate": 3.564193651724557e-07, "loss": 0.3626, "step": 47240 }, { "epoch": 4.73260880452747, "grad_norm": 2.3328723907470703, "learning_rate": 3.5377174697899253e-07, "loss": 0.4453, "step": 47250 }, { "epoch": 4.7336104572544695, "grad_norm": 2.4250965118408203, "learning_rate": 3.5113392916181485e-07, "loss": 0.4006, "step": 47260 }, { "epoch": 4.7346121099814695, "grad_norm": 2.66080904006958, "learning_rate": 3.485059127698309e-07, "loss": 0.4236, "step": 47270 }, { "epoch": 4.735613762708469, "grad_norm": 2.2034497261047363, "learning_rate": 3.4588769884805473e-07, "loss": 0.3963, "step": 47280 }, { "epoch": 4.736615415435469, "grad_norm": 2.1475400924682617, "learning_rate": 3.4327928843759517e-07, "loss": 0.4313, "step": 47290 }, { "epoch": 4.737617068162468, "grad_norm": 2.439648389816284, "learning_rate": 3.406806825756614e-07, "loss": 0.4619, "step": 47300 }, { "epoch": 4.738618720889468, "grad_norm": 1.9085047245025635, "learning_rate": 3.380918822955742e-07, "loss": 0.3627, "step": 47310 }, { "epoch": 4.739620373616467, "grad_norm": 2.736255168914795, "learning_rate": 3.3551288862675167e-07, "loss": 0.415, "step": 47320 }, { "epoch": 4.740622026343467, "grad_norm": 2.4652960300445557, "learning_rate": 3.329437025947013e-07, "loss": 0.4147, "step": 47330 }, { "epoch": 4.741623679070466, "grad_norm": 2.4739601612091064, "learning_rate": 3.303843252210448e-07, "loss": 0.4556, "step": 47340 }, { "epoch": 4.742625331797466, "grad_norm": 2.4230849742889404, "learning_rate": 3.278347575234986e-07, "loss": 0.3888, "step": 47350 }, { "epoch": 4.743626984524465, "grad_norm": 2.326728105545044, "learning_rate": 3.252950005158767e-07, "loss": 0.4818, "step": 47360 }, { "epoch": 4.7446286372514646, "grad_norm": 2.3454596996307373, "learning_rate": 3.2276505520809353e-07, "loss": 0.3918, "step": 47370 }, { "epoch": 4.745630289978465, "grad_norm": 2.66456937789917, "learning_rate": 3.20244922606161e-07, "loss": 0.4257, "step": 47380 }, { "epoch": 4.746631942705464, "grad_norm": 2.293207883834839, "learning_rate": 3.1773460371219144e-07, "loss": 0.4507, "step": 47390 }, { "epoch": 4.747633595432464, "grad_norm": 2.403235673904419, "learning_rate": 3.1523409952439465e-07, "loss": 0.4127, "step": 47400 }, { "epoch": 4.748635248159463, "grad_norm": 2.0613138675689697, "learning_rate": 3.1274341103706973e-07, "loss": 0.351, "step": 47410 }, { "epoch": 4.749636900886463, "grad_norm": 2.4735984802246094, "learning_rate": 3.102625392406244e-07, "loss": 0.44, "step": 47420 }, { "epoch": 4.750638553613462, "grad_norm": 1.6152466535568237, "learning_rate": 3.077914851215585e-07, "loss": 0.4251, "step": 47430 }, { "epoch": 4.751640206340462, "grad_norm": 1.6947752237319946, "learning_rate": 3.05330249662461e-07, "loss": 0.3719, "step": 47440 }, { "epoch": 4.752641859067461, "grad_norm": 2.6465158462524414, "learning_rate": 3.0287883384202965e-07, "loss": 0.4549, "step": 47450 }, { "epoch": 4.753643511794461, "grad_norm": 2.3784332275390625, "learning_rate": 3.004372386350457e-07, "loss": 0.4461, "step": 47460 }, { "epoch": 4.7546451645214605, "grad_norm": 2.1705985069274902, "learning_rate": 2.980054650123909e-07, "loss": 0.3915, "step": 47470 }, { "epoch": 4.75564681724846, "grad_norm": 2.2914929389953613, "learning_rate": 2.955835139410418e-07, "loss": 0.4398, "step": 47480 }, { "epoch": 4.75664846997546, "grad_norm": 2.312525749206543, "learning_rate": 2.9317138638406684e-07, "loss": 0.4375, "step": 47490 }, { "epoch": 4.757650122702459, "grad_norm": 1.8366079330444336, "learning_rate": 2.9076908330062937e-07, "loss": 0.3871, "step": 47500 }, { "epoch": 4.758651775429459, "grad_norm": 2.4751739501953125, "learning_rate": 2.8837660564598747e-07, "loss": 0.4007, "step": 47510 }, { "epoch": 4.759653428156458, "grad_norm": 1.7309223413467407, "learning_rate": 2.859939543714912e-07, "loss": 0.4545, "step": 47520 }, { "epoch": 4.760655080883458, "grad_norm": 2.232672929763794, "learning_rate": 2.836211304245773e-07, "loss": 0.4088, "step": 47530 }, { "epoch": 4.761656733610457, "grad_norm": 2.060687303543091, "learning_rate": 2.812581347487908e-07, "loss": 0.3797, "step": 47540 }, { "epoch": 4.762658386337456, "grad_norm": 2.090240716934204, "learning_rate": 2.789049682837469e-07, "loss": 0.4377, "step": 47550 }, { "epoch": 4.763660039064456, "grad_norm": 1.645607590675354, "learning_rate": 2.765616319651693e-07, "loss": 0.3978, "step": 47560 }, { "epoch": 4.764661691791456, "grad_norm": 1.79109525680542, "learning_rate": 2.742281267248681e-07, "loss": 0.4024, "step": 47570 }, { "epoch": 4.7656633445184555, "grad_norm": 2.5542709827423096, "learning_rate": 2.7190445349074e-07, "loss": 0.4798, "step": 47580 }, { "epoch": 4.766664997245455, "grad_norm": 2.0201637744903564, "learning_rate": 2.6959061318677645e-07, "loss": 0.4226, "step": 47590 }, { "epoch": 4.767666649972455, "grad_norm": 2.7899389266967773, "learning_rate": 2.672866067330554e-07, "loss": 0.4479, "step": 47600 }, { "epoch": 4.768668302699454, "grad_norm": 2.2652668952941895, "learning_rate": 2.649924350457522e-07, "loss": 0.4183, "step": 47610 }, { "epoch": 4.769669955426454, "grad_norm": 2.208167314529419, "learning_rate": 2.6270809903712057e-07, "loss": 0.379, "step": 47620 }, { "epoch": 4.770671608153453, "grad_norm": 2.410855293273926, "learning_rate": 2.604335996155088e-07, "loss": 0.4361, "step": 47630 }, { "epoch": 4.771673260880453, "grad_norm": 1.9786897897720337, "learning_rate": 2.5816893768535744e-07, "loss": 0.4211, "step": 47640 }, { "epoch": 4.772674913607452, "grad_norm": 2.5731799602508545, "learning_rate": 2.5591411414719046e-07, "loss": 0.3395, "step": 47650 }, { "epoch": 4.773676566334451, "grad_norm": 2.1998531818389893, "learning_rate": 2.5366912989761573e-07, "loss": 0.412, "step": 47660 }, { "epoch": 4.774678219061451, "grad_norm": 2.2904112339019775, "learning_rate": 2.5143398582933575e-07, "loss": 0.3507, "step": 47670 }, { "epoch": 4.775679871788451, "grad_norm": 2.1148123741149902, "learning_rate": 2.4920868283114243e-07, "loss": 0.4345, "step": 47680 }, { "epoch": 4.776681524515451, "grad_norm": 2.2035341262817383, "learning_rate": 2.4699322178790285e-07, "loss": 0.3879, "step": 47690 }, { "epoch": 4.77768317724245, "grad_norm": 2.235149621963501, "learning_rate": 2.4478760358057904e-07, "loss": 0.3872, "step": 47700 }, { "epoch": 4.77868482996945, "grad_norm": 2.893347978591919, "learning_rate": 2.4259182908622226e-07, "loss": 0.4255, "step": 47710 }, { "epoch": 4.779686482696449, "grad_norm": 2.0718188285827637, "learning_rate": 2.4040589917796175e-07, "loss": 0.4655, "step": 47720 }, { "epoch": 4.780688135423449, "grad_norm": 2.6403911113739014, "learning_rate": 2.382298147250106e-07, "loss": 0.3789, "step": 47730 }, { "epoch": 4.781689788150448, "grad_norm": 2.3864388465881348, "learning_rate": 2.3606357659267942e-07, "loss": 0.373, "step": 47740 }, { "epoch": 4.782691440877448, "grad_norm": 2.413670539855957, "learning_rate": 2.3390718564235137e-07, "loss": 0.4291, "step": 47750 }, { "epoch": 4.783693093604447, "grad_norm": 1.797200083732605, "learning_rate": 2.3176064273149612e-07, "loss": 0.3736, "step": 47760 }, { "epoch": 4.784694746331446, "grad_norm": 2.2276856899261475, "learning_rate": 2.2962394871367533e-07, "loss": 0.4219, "step": 47770 }, { "epoch": 4.7856963990584465, "grad_norm": 2.2116262912750244, "learning_rate": 2.2749710443852047e-07, "loss": 0.3695, "step": 47780 }, { "epoch": 4.786698051785446, "grad_norm": 2.3747339248657227, "learning_rate": 2.2538011075176059e-07, "loss": 0.4255, "step": 47790 }, { "epoch": 4.787699704512446, "grad_norm": 2.656726360321045, "learning_rate": 2.2327296849520008e-07, "loss": 0.3903, "step": 47800 }, { "epoch": 4.788701357239445, "grad_norm": 2.5891129970550537, "learning_rate": 2.2117567850672705e-07, "loss": 0.4349, "step": 47810 }, { "epoch": 4.789703009966445, "grad_norm": 2.4755101203918457, "learning_rate": 2.190882416203105e-07, "loss": 0.3904, "step": 47820 }, { "epoch": 4.790704662693444, "grad_norm": 2.4286439418792725, "learning_rate": 2.1701065866600312e-07, "loss": 0.3761, "step": 47830 }, { "epoch": 4.791706315420444, "grad_norm": 2.4156734943389893, "learning_rate": 2.149429304699413e-07, "loss": 0.4208, "step": 47840 }, { "epoch": 4.792707968147443, "grad_norm": 2.1973776817321777, "learning_rate": 2.1288505785433954e-07, "loss": 0.3716, "step": 47850 }, { "epoch": 4.793709620874443, "grad_norm": 2.4909114837646484, "learning_rate": 2.108370416374933e-07, "loss": 0.3857, "step": 47860 }, { "epoch": 4.794711273601442, "grad_norm": 2.1599183082580566, "learning_rate": 2.0879888263378167e-07, "loss": 0.4253, "step": 47870 }, { "epoch": 4.7957129263284415, "grad_norm": 1.7978217601776123, "learning_rate": 2.067705816536647e-07, "loss": 0.367, "step": 47880 }, { "epoch": 4.7967145790554415, "grad_norm": 2.390063524246216, "learning_rate": 2.0475213950367221e-07, "loss": 0.4664, "step": 47890 }, { "epoch": 4.797716231782441, "grad_norm": 2.1187353134155273, "learning_rate": 2.0274355698643166e-07, "loss": 0.4457, "step": 47900 }, { "epoch": 4.798717884509441, "grad_norm": 2.258744239807129, "learning_rate": 2.007448349006319e-07, "loss": 0.4192, "step": 47910 }, { "epoch": 4.79971953723644, "grad_norm": 2.4565742015838623, "learning_rate": 1.9875597404105383e-07, "loss": 0.4753, "step": 47920 }, { "epoch": 4.80072118996344, "grad_norm": 2.1519105434417725, "learning_rate": 1.967769751985482e-07, "loss": 0.4224, "step": 47930 }, { "epoch": 4.801722842690439, "grad_norm": 1.9558358192443848, "learning_rate": 1.9480783916005217e-07, "loss": 0.3739, "step": 47940 }, { "epoch": 4.802724495417439, "grad_norm": 2.899057388305664, "learning_rate": 1.9284856670857276e-07, "loss": 0.4697, "step": 47950 }, { "epoch": 4.803726148144438, "grad_norm": 2.3547844886779785, "learning_rate": 1.908991586232006e-07, "loss": 0.3781, "step": 47960 }, { "epoch": 4.804727800871438, "grad_norm": 2.6609480381011963, "learning_rate": 1.8895961567910737e-07, "loss": 0.388, "step": 47970 }, { "epoch": 4.805729453598437, "grad_norm": 2.470571994781494, "learning_rate": 1.8702993864752883e-07, "loss": 0.4113, "step": 47980 }, { "epoch": 4.8067311063254365, "grad_norm": 2.2260477542877197, "learning_rate": 1.8511012829578733e-07, "loss": 0.3831, "step": 47990 }, { "epoch": 4.807732759052437, "grad_norm": 1.9835573434829712, "learning_rate": 1.832001853872861e-07, "loss": 0.4476, "step": 48000 }, { "epoch": 4.808734411779436, "grad_norm": 2.08286190032959, "learning_rate": 1.8130011068149266e-07, "loss": 0.3969, "step": 48010 }, { "epoch": 4.809736064506436, "grad_norm": 2.29129958152771, "learning_rate": 1.7940990493395815e-07, "loss": 0.4017, "step": 48020 }, { "epoch": 4.810737717233435, "grad_norm": 2.6982805728912354, "learning_rate": 1.775295688963091e-07, "loss": 0.4785, "step": 48030 }, { "epoch": 4.811739369960435, "grad_norm": 2.0841140747070312, "learning_rate": 1.7565910331624468e-07, "loss": 0.4117, "step": 48040 }, { "epoch": 4.812741022687434, "grad_norm": 2.39800763130188, "learning_rate": 1.7379850893754212e-07, "loss": 0.4054, "step": 48050 }, { "epoch": 4.813742675414434, "grad_norm": 1.6849093437194824, "learning_rate": 1.719477865000513e-07, "loss": 0.4053, "step": 48060 }, { "epoch": 4.814744328141433, "grad_norm": 2.5003740787506104, "learning_rate": 1.7010693673969736e-07, "loss": 0.3939, "step": 48070 }, { "epoch": 4.815745980868433, "grad_norm": 2.4821770191192627, "learning_rate": 1.6827596038848092e-07, "loss": 0.4269, "step": 48080 }, { "epoch": 4.8167476335954325, "grad_norm": 1.7561688423156738, "learning_rate": 1.664548581744696e-07, "loss": 0.3697, "step": 48090 }, { "epoch": 4.817749286322432, "grad_norm": 2.127955436706543, "learning_rate": 1.6464363082181467e-07, "loss": 0.3987, "step": 48100 }, { "epoch": 4.818750939049432, "grad_norm": 1.8868879079818726, "learning_rate": 1.6284227905073722e-07, "loss": 0.4484, "step": 48110 }, { "epoch": 4.819752591776431, "grad_norm": 1.6073931455612183, "learning_rate": 1.6105080357752822e-07, "loss": 0.3723, "step": 48120 }, { "epoch": 4.820754244503431, "grad_norm": 2.3502357006073, "learning_rate": 1.592692051145539e-07, "loss": 0.3868, "step": 48130 }, { "epoch": 4.82175589723043, "grad_norm": 2.4274137020111084, "learning_rate": 1.5749748437025314e-07, "loss": 0.423, "step": 48140 }, { "epoch": 4.82275754995743, "grad_norm": 2.140169143676758, "learning_rate": 1.557356420491346e-07, "loss": 0.4156, "step": 48150 }, { "epoch": 4.823759202684429, "grad_norm": 2.5988030433654785, "learning_rate": 1.539836788517851e-07, "loss": 0.4077, "step": 48160 }, { "epoch": 4.824760855411429, "grad_norm": 2.1025750637054443, "learning_rate": 1.5224159547485573e-07, "loss": 0.4326, "step": 48170 }, { "epoch": 4.825762508138428, "grad_norm": 1.7013978958129883, "learning_rate": 1.505093926110729e-07, "loss": 0.396, "step": 48180 }, { "epoch": 4.826764160865428, "grad_norm": 2.384352207183838, "learning_rate": 1.4878707094923283e-07, "loss": 0.3865, "step": 48190 }, { "epoch": 4.8277658135924275, "grad_norm": 2.697436571121216, "learning_rate": 1.470746311742044e-07, "loss": 0.3775, "step": 48200 }, { "epoch": 4.828767466319427, "grad_norm": 2.4065449237823486, "learning_rate": 1.4537207396692343e-07, "loss": 0.4029, "step": 48210 }, { "epoch": 4.829769119046427, "grad_norm": 1.876576542854309, "learning_rate": 1.436794000043984e-07, "loss": 0.4067, "step": 48220 }, { "epoch": 4.830770771773426, "grad_norm": 2.1361072063446045, "learning_rate": 1.419966099597103e-07, "loss": 0.3742, "step": 48230 }, { "epoch": 4.831772424500426, "grad_norm": 2.7625808715820312, "learning_rate": 1.4032370450200728e-07, "loss": 0.4557, "step": 48240 }, { "epoch": 4.832774077227425, "grad_norm": 2.028794527053833, "learning_rate": 1.386606842965016e-07, "loss": 0.4347, "step": 48250 }, { "epoch": 4.833775729954425, "grad_norm": 2.240504026412964, "learning_rate": 1.3700755000448373e-07, "loss": 0.4257, "step": 48260 }, { "epoch": 4.834777382681424, "grad_norm": 2.295073986053467, "learning_rate": 1.3536430228331122e-07, "loss": 0.3854, "step": 48270 }, { "epoch": 4.835779035408424, "grad_norm": 2.3853859901428223, "learning_rate": 1.3373094178640576e-07, "loss": 0.3957, "step": 48280 }, { "epoch": 4.836780688135423, "grad_norm": 2.4576685428619385, "learning_rate": 1.32107469163259e-07, "loss": 0.3913, "step": 48290 }, { "epoch": 4.837782340862423, "grad_norm": 1.7085213661193848, "learning_rate": 1.3049388505943504e-07, "loss": 0.3868, "step": 48300 }, { "epoch": 4.838783993589423, "grad_norm": 2.529207706451416, "learning_rate": 1.2889019011655955e-07, "loss": 0.4383, "step": 48310 }, { "epoch": 4.839785646316422, "grad_norm": 2.305237054824829, "learning_rate": 1.2729638497233077e-07, "loss": 0.4403, "step": 48320 }, { "epoch": 4.840787299043422, "grad_norm": 1.8845841884613037, "learning_rate": 1.2571247026051392e-07, "loss": 0.3387, "step": 48330 }, { "epoch": 4.841788951770421, "grad_norm": 2.760739803314209, "learning_rate": 1.241384466109413e-07, "loss": 0.4136, "step": 48340 }, { "epoch": 4.842790604497421, "grad_norm": 2.0073342323303223, "learning_rate": 1.2257431464950396e-07, "loss": 0.4204, "step": 48350 }, { "epoch": 4.84379225722442, "grad_norm": 2.3097691535949707, "learning_rate": 1.2102007499817103e-07, "loss": 0.3863, "step": 48360 }, { "epoch": 4.84479390995142, "grad_norm": 2.4525158405303955, "learning_rate": 1.1947572827497588e-07, "loss": 0.425, "step": 48370 }, { "epoch": 4.845795562678419, "grad_norm": 1.7297872304916382, "learning_rate": 1.1794127509401065e-07, "loss": 0.3882, "step": 48380 }, { "epoch": 4.846797215405419, "grad_norm": 2.2076919078826904, "learning_rate": 1.164167160654428e-07, "loss": 0.4148, "step": 48390 }, { "epoch": 4.8477988681324184, "grad_norm": 2.070524215698242, "learning_rate": 1.1490205179549851e-07, "loss": 0.3745, "step": 48400 }, { "epoch": 4.8488005208594185, "grad_norm": 2.7982497215270996, "learning_rate": 1.1339728288647378e-07, "loss": 0.4531, "step": 48410 }, { "epoch": 4.849802173586418, "grad_norm": 1.602648377418518, "learning_rate": 1.1190240993672607e-07, "loss": 0.3816, "step": 48420 }, { "epoch": 4.850803826313417, "grad_norm": 1.842578411102295, "learning_rate": 1.1041743354067991e-07, "loss": 0.3725, "step": 48430 }, { "epoch": 4.851805479040417, "grad_norm": 2.1316118240356445, "learning_rate": 1.0894235428882682e-07, "loss": 0.4026, "step": 48440 }, { "epoch": 4.852807131767416, "grad_norm": 2.16373348236084, "learning_rate": 1.0747717276771707e-07, "loss": 0.3965, "step": 48450 }, { "epoch": 4.853808784494416, "grad_norm": 2.0309717655181885, "learning_rate": 1.0602188955996795e-07, "loss": 0.4534, "step": 48460 }, { "epoch": 4.854810437221415, "grad_norm": 1.7845313549041748, "learning_rate": 1.0457650524426654e-07, "loss": 0.4438, "step": 48470 }, { "epoch": 4.855812089948415, "grad_norm": 1.8658897876739502, "learning_rate": 1.0314102039535312e-07, "loss": 0.3743, "step": 48480 }, { "epoch": 4.856813742675414, "grad_norm": 2.0582423210144043, "learning_rate": 1.0171543558403774e-07, "loss": 0.4112, "step": 48490 }, { "epoch": 4.857815395402414, "grad_norm": 2.1272170543670654, "learning_rate": 1.0029975137719472e-07, "loss": 0.3999, "step": 48500 }, { "epoch": 4.8588170481294135, "grad_norm": 2.131007432937622, "learning_rate": 9.88939683377571e-08, "loss": 0.384, "step": 48510 }, { "epoch": 4.8598187008564135, "grad_norm": 2.36995005607605, "learning_rate": 9.749808702472774e-08, "loss": 0.3686, "step": 48520 }, { "epoch": 4.860820353583413, "grad_norm": 2.426204204559326, "learning_rate": 9.611210799316262e-08, "loss": 0.4627, "step": 48530 }, { "epoch": 4.861822006310412, "grad_norm": 1.8704348802566528, "learning_rate": 9.473603179418756e-08, "loss": 0.4085, "step": 48540 }, { "epoch": 4.862823659037412, "grad_norm": 2.140582799911499, "learning_rate": 9.336985897498706e-08, "loss": 0.3729, "step": 48550 }, { "epoch": 4.863825311764411, "grad_norm": 1.9879549741744995, "learning_rate": 9.201359007881271e-08, "loss": 0.4406, "step": 48560 }, { "epoch": 4.864826964491411, "grad_norm": 2.478294610977173, "learning_rate": 9.066722564496921e-08, "loss": 0.3528, "step": 48570 }, { "epoch": 4.86582861721841, "grad_norm": 2.1455304622650146, "learning_rate": 8.93307662088283e-08, "loss": 0.3928, "step": 48580 }, { "epoch": 4.86683026994541, "grad_norm": 2.190230369567871, "learning_rate": 8.800421230182599e-08, "loss": 0.3745, "step": 48590 }, { "epoch": 4.867831922672409, "grad_norm": 2.3964619636535645, "learning_rate": 8.668756445145421e-08, "loss": 0.4348, "step": 48600 }, { "epoch": 4.8688335753994085, "grad_norm": 2.1755192279815674, "learning_rate": 8.53808231812664e-08, "loss": 0.3612, "step": 48610 }, { "epoch": 4.869835228126409, "grad_norm": 2.1091182231903076, "learning_rate": 8.408398901087466e-08, "loss": 0.4825, "step": 48620 }, { "epoch": 4.870836880853409, "grad_norm": 2.6433656215667725, "learning_rate": 8.279706245596375e-08, "loss": 0.4309, "step": 48630 }, { "epoch": 4.871838533580408, "grad_norm": 2.1513772010803223, "learning_rate": 8.152004402826319e-08, "loss": 0.3659, "step": 48640 }, { "epoch": 4.872840186307407, "grad_norm": 2.3869216442108154, "learning_rate": 8.025293423556956e-08, "loss": 0.4321, "step": 48650 }, { "epoch": 4.873841839034407, "grad_norm": 2.6791770458221436, "learning_rate": 7.899573358174094e-08, "loss": 0.4111, "step": 48660 }, { "epoch": 4.874843491761406, "grad_norm": 2.2372169494628906, "learning_rate": 7.774844256669134e-08, "loss": 0.4593, "step": 48670 }, { "epoch": 4.875845144488406, "grad_norm": 2.1360697746276855, "learning_rate": 7.651106168639344e-08, "loss": 0.443, "step": 48680 }, { "epoch": 4.876846797215405, "grad_norm": 2.072798490524292, "learning_rate": 7.528359143288977e-08, "loss": 0.3797, "step": 48690 }, { "epoch": 4.877848449942405, "grad_norm": 2.050501823425293, "learning_rate": 7.406603229427045e-08, "loss": 0.3611, "step": 48700 }, { "epoch": 4.878850102669404, "grad_norm": 2.0490472316741943, "learning_rate": 7.285838475468431e-08, "loss": 0.4052, "step": 48710 }, { "epoch": 4.879851755396404, "grad_norm": 2.0858280658721924, "learning_rate": 7.166064929434446e-08, "loss": 0.4111, "step": 48720 }, { "epoch": 4.880853408123404, "grad_norm": 1.834701657295227, "learning_rate": 7.047282638952545e-08, "loss": 0.4064, "step": 48730 }, { "epoch": 4.881855060850404, "grad_norm": 1.9594968557357788, "learning_rate": 6.929491651255226e-08, "loss": 0.4009, "step": 48740 }, { "epoch": 4.882856713577403, "grad_norm": 1.9518842697143555, "learning_rate": 6.812692013181132e-08, "loss": 0.3914, "step": 48750 }, { "epoch": 4.883858366304402, "grad_norm": 3.005319118499756, "learning_rate": 6.6968837711745e-08, "loss": 0.4469, "step": 48760 }, { "epoch": 4.884860019031402, "grad_norm": 2.5086545944213867, "learning_rate": 6.582066971285995e-08, "loss": 0.4403, "step": 48770 }, { "epoch": 4.885861671758401, "grad_norm": 2.4275851249694824, "learning_rate": 6.468241659171315e-08, "loss": 0.4217, "step": 48780 }, { "epoch": 4.886863324485401, "grad_norm": 2.5610122680664062, "learning_rate": 6.355407880092313e-08, "loss": 0.4648, "step": 48790 }, { "epoch": 4.8878649772124, "grad_norm": 1.8257973194122314, "learning_rate": 6.24356567891643e-08, "loss": 0.4074, "step": 48800 }, { "epoch": 4.8888666299394, "grad_norm": 1.4888192415237427, "learning_rate": 6.132715100116704e-08, "loss": 0.4077, "step": 48810 }, { "epoch": 4.8898682826663995, "grad_norm": 2.0692968368530273, "learning_rate": 6.022856187772041e-08, "loss": 0.4498, "step": 48820 }, { "epoch": 4.890869935393399, "grad_norm": 1.9948792457580566, "learning_rate": 5.913988985566943e-08, "loss": 0.3604, "step": 48830 }, { "epoch": 4.891871588120399, "grad_norm": 2.2797932624816895, "learning_rate": 5.806113536791779e-08, "loss": 0.4214, "step": 48840 }, { "epoch": 4.892873240847398, "grad_norm": 1.757158637046814, "learning_rate": 5.699229884341961e-08, "loss": 0.4587, "step": 48850 }, { "epoch": 4.893874893574398, "grad_norm": 2.277294874191284, "learning_rate": 5.593338070719323e-08, "loss": 0.3885, "step": 48860 }, { "epoch": 4.894876546301397, "grad_norm": 2.3876211643218994, "learning_rate": 5.488438138030738e-08, "loss": 0.365, "step": 48870 }, { "epoch": 4.895878199028397, "grad_norm": 2.418525457382202, "learning_rate": 5.3845301279889514e-08, "loss": 0.3895, "step": 48880 }, { "epoch": 4.896879851755396, "grad_norm": 2.0426852703094482, "learning_rate": 5.2816140819120233e-08, "loss": 0.4293, "step": 48890 }, { "epoch": 4.897881504482396, "grad_norm": 2.254586696624756, "learning_rate": 5.179690040723606e-08, "loss": 0.3793, "step": 48900 }, { "epoch": 4.898883157209395, "grad_norm": 2.491412878036499, "learning_rate": 5.078758044952947e-08, "loss": 0.3904, "step": 48910 }, { "epoch": 4.899884809936395, "grad_norm": 2.3205726146698, "learning_rate": 4.978818134735164e-08, "loss": 0.3826, "step": 48920 }, { "epoch": 4.900886462663395, "grad_norm": 2.237753391265869, "learning_rate": 4.879870349810689e-08, "loss": 0.3971, "step": 48930 }, { "epoch": 4.901888115390394, "grad_norm": 1.7564687728881836, "learning_rate": 4.781914729524717e-08, "loss": 0.4064, "step": 48940 }, { "epoch": 4.902889768117394, "grad_norm": 2.1869213581085205, "learning_rate": 4.684951312828867e-08, "loss": 0.4179, "step": 48950 }, { "epoch": 4.903891420844393, "grad_norm": 1.9542244672775269, "learning_rate": 4.588980138279797e-08, "loss": 0.4201, "step": 48960 }, { "epoch": 4.904893073571393, "grad_norm": 2.3261213302612305, "learning_rate": 4.4940012440397583e-08, "loss": 0.3918, "step": 48970 }, { "epoch": 4.905894726298392, "grad_norm": 2.3012595176696777, "learning_rate": 4.400014667876318e-08, "loss": 0.4794, "step": 48980 }, { "epoch": 4.906896379025392, "grad_norm": 2.335749387741089, "learning_rate": 4.307020447162635e-08, "loss": 0.3636, "step": 48990 }, { "epoch": 4.907898031752391, "grad_norm": 2.5763540267944336, "learning_rate": 4.215018618876632e-08, "loss": 0.388, "step": 49000 }, { "epoch": 4.908899684479391, "grad_norm": 2.0762269496917725, "learning_rate": 4.124009219602654e-08, "loss": 0.4073, "step": 49010 }, { "epoch": 4.90990133720639, "grad_norm": 2.1076955795288086, "learning_rate": 4.033992285529809e-08, "loss": 0.4247, "step": 49020 }, { "epoch": 4.9109029899333905, "grad_norm": 2.1044840812683105, "learning_rate": 3.9449678524522415e-08, "loss": 0.449, "step": 49030 }, { "epoch": 4.91190464266039, "grad_norm": 2.3766860961914062, "learning_rate": 3.856935955769969e-08, "loss": 0.4182, "step": 49040 }, { "epoch": 4.912906295387389, "grad_norm": 2.158463716506958, "learning_rate": 3.769896630488323e-08, "loss": 0.4147, "step": 49050 }, { "epoch": 4.913907948114389, "grad_norm": 2.246264696121216, "learning_rate": 3.683849911217674e-08, "loss": 0.4409, "step": 49060 }, { "epoch": 4.914909600841388, "grad_norm": 1.723870873451233, "learning_rate": 3.5987958321739844e-08, "loss": 0.4226, "step": 49070 }, { "epoch": 4.915911253568388, "grad_norm": 2.071007490158081, "learning_rate": 3.514734427177979e-08, "loss": 0.5269, "step": 49080 }, { "epoch": 4.916912906295387, "grad_norm": 2.3110177516937256, "learning_rate": 3.431665729656253e-08, "loss": 0.3906, "step": 49090 }, { "epoch": 4.917914559022387, "grad_norm": 1.8781073093414307, "learning_rate": 3.349589772640715e-08, "loss": 0.3827, "step": 49100 }, { "epoch": 4.918916211749386, "grad_norm": 2.104565143585205, "learning_rate": 3.2685065887674834e-08, "loss": 0.4316, "step": 49110 }, { "epoch": 4.919917864476386, "grad_norm": 2.219674587249756, "learning_rate": 3.188416210279099e-08, "loss": 0.363, "step": 49120 }, { "epoch": 4.9209195172033855, "grad_norm": 2.114689588546753, "learning_rate": 3.1093186690228645e-08, "loss": 0.4162, "step": 49130 }, { "epoch": 4.9219211699303855, "grad_norm": 2.0361156463623047, "learning_rate": 3.031213996451121e-08, "loss": 0.4122, "step": 49140 }, { "epoch": 4.922922822657385, "grad_norm": 1.689947247505188, "learning_rate": 2.954102223621802e-08, "loss": 0.4286, "step": 49150 }, { "epoch": 4.923924475384384, "grad_norm": 1.9771041870117188, "learning_rate": 2.877983381197602e-08, "loss": 0.3823, "step": 49160 }, { "epoch": 4.924926128111384, "grad_norm": 2.4345974922180176, "learning_rate": 2.8028574994465317e-08, "loss": 0.4479, "step": 49170 }, { "epoch": 4.925927780838383, "grad_norm": 2.2519659996032715, "learning_rate": 2.728724608241917e-08, "loss": 0.3989, "step": 49180 }, { "epoch": 4.926929433565383, "grad_norm": 2.584491729736328, "learning_rate": 2.6555847370621222e-08, "loss": 0.441, "step": 49190 }, { "epoch": 4.927931086292382, "grad_norm": 2.2947561740875244, "learning_rate": 2.5834379149905495e-08, "loss": 0.4375, "step": 49200 }, { "epoch": 4.928932739019382, "grad_norm": 2.1762804985046387, "learning_rate": 2.5122841707159172e-08, "loss": 0.3976, "step": 49210 }, { "epoch": 4.929934391746381, "grad_norm": 2.127589225769043, "learning_rate": 2.4421235325319815e-08, "loss": 0.3811, "step": 49220 }, { "epoch": 4.930936044473381, "grad_norm": 2.0069942474365234, "learning_rate": 2.3729560283372586e-08, "loss": 0.4151, "step": 49230 }, { "epoch": 4.9319376972003806, "grad_norm": 1.6562203168869019, "learning_rate": 2.3047816856358595e-08, "loss": 0.3697, "step": 49240 }, { "epoch": 4.932939349927381, "grad_norm": 2.022047996520996, "learning_rate": 2.2376005315369318e-08, "loss": 0.4432, "step": 49250 }, { "epoch": 4.93394100265438, "grad_norm": 2.255925416946411, "learning_rate": 2.1714125927543848e-08, "loss": 0.3806, "step": 49260 }, { "epoch": 4.934942655381379, "grad_norm": 1.9878329038619995, "learning_rate": 2.1062178956071655e-08, "loss": 0.3847, "step": 49270 }, { "epoch": 4.935944308108379, "grad_norm": 1.935551404953003, "learning_rate": 2.0420164660195362e-08, "loss": 0.4273, "step": 49280 }, { "epoch": 4.936945960835378, "grad_norm": 2.561936378479004, "learning_rate": 1.978808329520798e-08, "loss": 0.4051, "step": 49290 }, { "epoch": 4.937947613562378, "grad_norm": 2.0825181007385254, "learning_rate": 1.916593511245013e-08, "loss": 0.4425, "step": 49300 }, { "epoch": 4.938949266289377, "grad_norm": 1.867803692817688, "learning_rate": 1.8553720359315574e-08, "loss": 0.442, "step": 49310 }, { "epoch": 4.939950919016377, "grad_norm": 2.4296388626098633, "learning_rate": 1.7951439279245697e-08, "loss": 0.4022, "step": 49320 }, { "epoch": 4.940952571743376, "grad_norm": 1.7822569608688354, "learning_rate": 1.7359092111732255e-08, "loss": 0.42, "step": 49330 }, { "epoch": 4.9419542244703765, "grad_norm": 2.034141778945923, "learning_rate": 1.6776679092320168e-08, "loss": 0.4095, "step": 49340 }, { "epoch": 4.942955877197376, "grad_norm": 2.480480194091797, "learning_rate": 1.6204200452596407e-08, "loss": 0.4123, "step": 49350 }, { "epoch": 4.943957529924376, "grad_norm": 1.9543697834014893, "learning_rate": 1.564165642020665e-08, "loss": 0.4178, "step": 49360 }, { "epoch": 4.944959182651375, "grad_norm": 2.275780439376831, "learning_rate": 1.5089047218838637e-08, "loss": 0.4003, "step": 49370 }, { "epoch": 4.945960835378374, "grad_norm": 2.82968807220459, "learning_rate": 1.454637306823603e-08, "loss": 0.4349, "step": 49380 }, { "epoch": 4.946962488105374, "grad_norm": 1.861443042755127, "learning_rate": 1.4013634184190105e-08, "loss": 0.3798, "step": 49390 }, { "epoch": 4.947964140832373, "grad_norm": 2.3817360401153564, "learning_rate": 1.3490830778534192e-08, "loss": 0.3854, "step": 49400 }, { "epoch": 4.948965793559373, "grad_norm": 2.194943904876709, "learning_rate": 1.2977963059163101e-08, "loss": 0.422, "step": 49410 }, { "epoch": 4.949967446286372, "grad_norm": 2.164625644683838, "learning_rate": 1.24750312300137e-08, "loss": 0.399, "step": 49420 }, { "epoch": 4.950969099013372, "grad_norm": 2.4973015785217285, "learning_rate": 1.198203549106769e-08, "loss": 0.4427, "step": 49430 }, { "epoch": 4.9519707517403715, "grad_norm": 1.9972591400146484, "learning_rate": 1.1498976038368248e-08, "loss": 0.3763, "step": 49440 }, { "epoch": 4.9529724044673715, "grad_norm": 2.178032875061035, "learning_rate": 1.1025853063992287e-08, "loss": 0.4024, "step": 49450 }, { "epoch": 4.953974057194371, "grad_norm": 1.7938523292541504, "learning_rate": 1.0562666756080974e-08, "loss": 0.3719, "step": 49460 }, { "epoch": 4.954975709921371, "grad_norm": 1.7838163375854492, "learning_rate": 1.0109417298811985e-08, "loss": 0.3619, "step": 49470 }, { "epoch": 4.95597736264837, "grad_norm": 1.8792906999588013, "learning_rate": 9.666104872416148e-09, "loss": 0.4221, "step": 49480 }, { "epoch": 4.956979015375369, "grad_norm": 1.9721959829330444, "learning_rate": 9.232729653177452e-09, "loss": 0.3949, "step": 49490 }, { "epoch": 4.957980668102369, "grad_norm": 2.2299644947052, "learning_rate": 8.809291813419163e-09, "loss": 0.4221, "step": 49500 }, { "epoch": 4.958982320829368, "grad_norm": 2.5148696899414062, "learning_rate": 8.39579152152048e-09, "loss": 0.4239, "step": 49510 }, { "epoch": 4.959983973556368, "grad_norm": 2.1798460483551025, "learning_rate": 7.992228941905433e-09, "loss": 0.427, "step": 49520 }, { "epoch": 4.960985626283367, "grad_norm": 2.080162525177002, "learning_rate": 7.598604235048434e-09, "loss": 0.4076, "step": 49530 }, { "epoch": 4.961987279010367, "grad_norm": 1.9533112049102783, "learning_rate": 7.214917557471501e-09, "loss": 0.3875, "step": 49540 }, { "epoch": 4.9629889317373665, "grad_norm": 2.1065080165863037, "learning_rate": 6.841169061744257e-09, "loss": 0.4761, "step": 49550 }, { "epoch": 4.963990584464367, "grad_norm": 2.6746320724487305, "learning_rate": 6.477358896483932e-09, "loss": 0.3773, "step": 49560 }, { "epoch": 4.964992237191366, "grad_norm": 1.8172719478607178, "learning_rate": 6.1234872063553604e-09, "loss": 0.3882, "step": 49570 }, { "epoch": 4.965993889918366, "grad_norm": 2.7890677452087402, "learning_rate": 5.779554132076537e-09, "loss": 0.3805, "step": 49580 }, { "epoch": 4.966995542645365, "grad_norm": 2.2248497009277344, "learning_rate": 5.445559810407508e-09, "loss": 0.4279, "step": 49590 }, { "epoch": 4.967997195372364, "grad_norm": 2.016270399093628, "learning_rate": 5.1215043741587034e-09, "loss": 0.3731, "step": 49600 }, { "epoch": 4.968998848099364, "grad_norm": 2.032270669937134, "learning_rate": 4.8073879521909335e-09, "loss": 0.4135, "step": 49610 }, { "epoch": 4.970000500826363, "grad_norm": 2.85500431060791, "learning_rate": 4.50321066940429e-09, "loss": 0.4544, "step": 49620 }, { "epoch": 4.971002153553363, "grad_norm": 1.6594706773757935, "learning_rate": 4.2089726467547945e-09, "loss": 0.4161, "step": 49630 }, { "epoch": 4.972003806280362, "grad_norm": 2.2570676803588867, "learning_rate": 3.9246740012488515e-09, "loss": 0.4116, "step": 49640 }, { "epoch": 4.9730054590073625, "grad_norm": 2.1982297897338867, "learning_rate": 3.6503148459265944e-09, "loss": 0.394, "step": 49650 }, { "epoch": 4.974007111734362, "grad_norm": 2.184145450592041, "learning_rate": 3.3858952898924156e-09, "loss": 0.4166, "step": 49660 }, { "epoch": 4.975008764461361, "grad_norm": 1.760334849357605, "learning_rate": 3.1314154382872108e-09, "loss": 0.4014, "step": 49670 }, { "epoch": 4.976010417188361, "grad_norm": 2.06929612159729, "learning_rate": 2.8868753923022573e-09, "loss": 0.4085, "step": 49680 }, { "epoch": 4.977012069915361, "grad_norm": 2.60774827003479, "learning_rate": 2.6522752491792147e-09, "loss": 0.3767, "step": 49690 }, { "epoch": 4.97801372264236, "grad_norm": 2.44492769241333, "learning_rate": 2.4276151022045724e-09, "loss": 0.4283, "step": 49700 }, { "epoch": 4.979015375369359, "grad_norm": 2.499879837036133, "learning_rate": 2.212895040712426e-09, "loss": 0.4121, "step": 49710 }, { "epoch": 4.980017028096359, "grad_norm": 1.796566367149353, "learning_rate": 2.008115150081702e-09, "loss": 0.3966, "step": 49720 }, { "epoch": 4.981018680823358, "grad_norm": 2.4814465045928955, "learning_rate": 1.8132755117444832e-09, "loss": 0.3729, "step": 49730 }, { "epoch": 4.982020333550358, "grad_norm": 2.5805459022521973, "learning_rate": 1.628376203177684e-09, "loss": 0.3886, "step": 49740 }, { "epoch": 4.9830219862773575, "grad_norm": 2.3878934383392334, "learning_rate": 1.453417297903048e-09, "loss": 0.42, "step": 49750 }, { "epoch": 4.9840236390043575, "grad_norm": 1.974474549293518, "learning_rate": 1.2883988654927015e-09, "loss": 0.4083, "step": 49760 }, { "epoch": 4.985025291731357, "grad_norm": 2.5575461387634277, "learning_rate": 1.1333209715636006e-09, "loss": 0.4061, "step": 49770 }, { "epoch": 4.986026944458356, "grad_norm": 1.7831733226776123, "learning_rate": 9.881836777830832e-10, "loss": 0.4559, "step": 49780 }, { "epoch": 4.987028597185356, "grad_norm": 2.3388864994049072, "learning_rate": 8.529870418633179e-10, "loss": 0.4285, "step": 49790 }, { "epoch": 4.988030249912356, "grad_norm": 2.1213908195495605, "learning_rate": 7.277311175640789e-10, "loss": 0.4158, "step": 49800 }, { "epoch": 4.989031902639355, "grad_norm": 2.5922374725341797, "learning_rate": 6.124159546899711e-10, "loss": 0.4285, "step": 49810 }, { "epoch": 4.990033555366354, "grad_norm": 1.7242733240127563, "learning_rate": 5.070415990987565e-10, "loss": 0.421, "step": 49820 }, { "epoch": 4.991035208093354, "grad_norm": 2.315945863723755, "learning_rate": 4.1160809269025213e-10, "loss": 0.4358, "step": 49830 }, { "epoch": 4.992036860820353, "grad_norm": 2.1397175788879395, "learning_rate": 3.261154734146565e-10, "loss": 0.4419, "step": 49840 }, { "epoch": 4.993038513547353, "grad_norm": 1.9984654188156128, "learning_rate": 2.505637752642231e-10, "loss": 0.4377, "step": 49850 }, { "epoch": 4.9940401662743525, "grad_norm": 1.9078891277313232, "learning_rate": 1.84953028281587e-10, "loss": 0.4617, "step": 49860 }, { "epoch": 4.995041819001353, "grad_norm": 2.45648455619812, "learning_rate": 1.2928325855976475e-10, "loss": 0.4741, "step": 49870 }, { "epoch": 4.996043471728352, "grad_norm": 1.3880661725997925, "learning_rate": 8.355448823105238e-11, "loss": 0.3786, "step": 49880 }, { "epoch": 4.997045124455351, "grad_norm": 1.9489227533340454, "learning_rate": 4.77667354836786e-11, "loss": 0.3738, "step": 49890 }, { "epoch": 4.998046777182351, "grad_norm": 2.0824508666992188, "learning_rate": 2.192001454515147e-11, "loss": 0.4642, "step": 49900 }, { "epoch": 4.99904842990935, "grad_norm": 2.111239433288574, "learning_rate": 6.014335693360629e-12, "loss": 0.3691, "step": 49910 }, { "epoch": 5.0, "grad_norm": 1.7227387428283691, "learning_rate": 4.970525657732594e-14, "loss": 0.4032, "step": 49920 }, { "epoch": 5.001001652726999, "grad_norm": 1.9057866334915161, "learning_rate": 9.429981463315144e-06, "loss": 0.3777, "step": 49930 }, { "epoch": 5.002003305453999, "grad_norm": 2.094399929046631, "learning_rate": 9.421178086170654e-06, "loss": 0.3618, "step": 49940 }, { "epoch": 5.003004958180998, "grad_norm": 2.0568583011627197, "learning_rate": 9.412377866013538e-06, "loss": 0.3926, "step": 49950 }, { "epoch": 5.004006610907998, "grad_norm": 2.5123448371887207, "learning_rate": 9.403580804627127e-06, "loss": 0.4442, "step": 49960 }, { "epoch": 5.0050082636349975, "grad_norm": 2.6346914768218994, "learning_rate": 9.394786903794133e-06, "loss": 0.4134, "step": 49970 }, { "epoch": 5.0060099163619975, "grad_norm": 2.1011178493499756, "learning_rate": 9.385996165296584e-06, "loss": 0.3734, "step": 49980 }, { "epoch": 5.007011569088997, "grad_norm": 2.0437891483306885, "learning_rate": 9.377208590915892e-06, "loss": 0.3885, "step": 49990 }, { "epoch": 5.008013221815997, "grad_norm": 2.111875295639038, "learning_rate": 9.368424182432825e-06, "loss": 0.4173, "step": 50000 }, { "epoch": 5.008013221815997, "eval_bleu": 0.4009660835826073, "eval_loss": 0.508514940738678, "eval_rouge1": 0.715396568133781, "eval_rouge2": 0.550282125226329, "eval_rougeL": 0.6740186461553936, "eval_runtime": 77959.3622, "eval_samples_per_second": 0.228, "eval_steps_per_second": 0.028, "eval_wer": 0.6738413384611066, "step": 50000 }, { "epoch": 5.009014874542996, "grad_norm": 2.5804548263549805, "learning_rate": 9.359642941627524e-06, "loss": 0.4246, "step": 50010 }, { "epoch": 5.010016527269996, "grad_norm": 2.5093562602996826, "learning_rate": 9.350864870279457e-06, "loss": 0.3901, "step": 50020 }, { "epoch": 5.011018179996995, "grad_norm": 2.047625780105591, "learning_rate": 9.342089970167458e-06, "loss": 0.3293, "step": 50030 }, { "epoch": 5.012019832723994, "grad_norm": 2.4648852348327637, "learning_rate": 9.33331824306975e-06, "loss": 0.47, "step": 50040 }, { "epoch": 5.013021485450994, "grad_norm": 2.168367862701416, "learning_rate": 9.324549690763887e-06, "loss": 0.4254, "step": 50050 }, { "epoch": 5.014023138177993, "grad_norm": 2.1796538829803467, "learning_rate": 9.31578431502676e-06, "loss": 0.4144, "step": 50060 }, { "epoch": 5.015024790904993, "grad_norm": 2.1317245960235596, "learning_rate": 9.307022117634646e-06, "loss": 0.4444, "step": 50070 }, { "epoch": 5.016026443631993, "grad_norm": 2.2961223125457764, "learning_rate": 9.298263100363188e-06, "loss": 0.4129, "step": 50080 }, { "epoch": 5.017028096358993, "grad_norm": 2.3871800899505615, "learning_rate": 9.289507264987348e-06, "loss": 0.3687, "step": 50090 }, { "epoch": 5.018029749085992, "grad_norm": 2.189589500427246, "learning_rate": 9.280754613281456e-06, "loss": 0.3685, "step": 50100 }, { "epoch": 5.019031401812992, "grad_norm": 2.174671173095703, "learning_rate": 9.272005147019225e-06, "loss": 0.388, "step": 50110 }, { "epoch": 5.020033054539991, "grad_norm": 1.7764248847961426, "learning_rate": 9.263258867973696e-06, "loss": 0.4141, "step": 50120 }, { "epoch": 5.021034707266991, "grad_norm": 2.3782589435577393, "learning_rate": 9.254515777917253e-06, "loss": 0.4332, "step": 50130 }, { "epoch": 5.02203635999399, "grad_norm": 1.9943517446517944, "learning_rate": 9.245775878621649e-06, "loss": 0.4191, "step": 50140 }, { "epoch": 5.023038012720989, "grad_norm": 2.292137384414673, "learning_rate": 9.237039171858006e-06, "loss": 0.3959, "step": 50150 }, { "epoch": 5.024039665447989, "grad_norm": 2.6247379779815674, "learning_rate": 9.228305659396785e-06, "loss": 0.3956, "step": 50160 }, { "epoch": 5.0250413181749884, "grad_norm": 1.960654854774475, "learning_rate": 9.219575343007771e-06, "loss": 0.3702, "step": 50170 }, { "epoch": 5.0260429709019885, "grad_norm": 1.9130282402038574, "learning_rate": 9.210848224460158e-06, "loss": 0.3923, "step": 50180 }, { "epoch": 5.027044623628988, "grad_norm": 2.1862452030181885, "learning_rate": 9.202124305522462e-06, "loss": 0.3833, "step": 50190 }, { "epoch": 5.028046276355988, "grad_norm": 1.7818164825439453, "learning_rate": 9.193403587962527e-06, "loss": 0.3926, "step": 50200 }, { "epoch": 5.029047929082987, "grad_norm": 2.3955230712890625, "learning_rate": 9.184686073547576e-06, "loss": 0.4152, "step": 50210 }, { "epoch": 5.030049581809987, "grad_norm": 2.290144681930542, "learning_rate": 9.175971764044202e-06, "loss": 0.4072, "step": 50220 }, { "epoch": 5.031051234536986, "grad_norm": 1.9459701776504517, "learning_rate": 9.167260661218322e-06, "loss": 0.413, "step": 50230 }, { "epoch": 5.032052887263985, "grad_norm": 1.623977541923523, "learning_rate": 9.158552766835176e-06, "loss": 0.3658, "step": 50240 }, { "epoch": 5.033054539990985, "grad_norm": 2.4459660053253174, "learning_rate": 9.149848082659417e-06, "loss": 0.4204, "step": 50250 }, { "epoch": 5.034056192717984, "grad_norm": 1.9712905883789062, "learning_rate": 9.141146610455006e-06, "loss": 0.4554, "step": 50260 }, { "epoch": 5.035057845444984, "grad_norm": 2.139141798019409, "learning_rate": 9.13244835198526e-06, "loss": 0.3885, "step": 50270 }, { "epoch": 5.0360594981719835, "grad_norm": 2.5080270767211914, "learning_rate": 9.123753309012848e-06, "loss": 0.4311, "step": 50280 }, { "epoch": 5.0370611508989835, "grad_norm": 1.8112452030181885, "learning_rate": 9.115061483299786e-06, "loss": 0.3111, "step": 50290 }, { "epoch": 5.038062803625983, "grad_norm": 2.099536180496216, "learning_rate": 9.10637287660745e-06, "loss": 0.3742, "step": 50300 }, { "epoch": 5.039064456352983, "grad_norm": 2.1379730701446533, "learning_rate": 9.097687490696522e-06, "loss": 0.3914, "step": 50310 }, { "epoch": 5.040066109079982, "grad_norm": 2.5523993968963623, "learning_rate": 9.089005327327088e-06, "loss": 0.4206, "step": 50320 }, { "epoch": 5.041067761806982, "grad_norm": 1.9698617458343506, "learning_rate": 9.08032638825855e-06, "loss": 0.3978, "step": 50330 }, { "epoch": 5.042069414533981, "grad_norm": 2.262073516845703, "learning_rate": 9.071650675249658e-06, "loss": 0.4593, "step": 50340 }, { "epoch": 5.04307106726098, "grad_norm": 2.4841954708099365, "learning_rate": 9.06297819005851e-06, "loss": 0.3381, "step": 50350 }, { "epoch": 5.04407271998798, "grad_norm": 2.5123629570007324, "learning_rate": 9.054308934442554e-06, "loss": 0.4633, "step": 50360 }, { "epoch": 5.045074372714979, "grad_norm": 2.472378969192505, "learning_rate": 9.045642910158581e-06, "loss": 0.3572, "step": 50370 }, { "epoch": 5.046076025441979, "grad_norm": 2.2146644592285156, "learning_rate": 9.036980118962723e-06, "loss": 0.4085, "step": 50380 }, { "epoch": 5.047077678168979, "grad_norm": 2.2886135578155518, "learning_rate": 9.028320562610465e-06, "loss": 0.4098, "step": 50390 }, { "epoch": 5.048079330895979, "grad_norm": 2.3172385692596436, "learning_rate": 9.019664242856632e-06, "loss": 0.401, "step": 50400 }, { "epoch": 5.049080983622978, "grad_norm": 2.2763006687164307, "learning_rate": 9.01101116145539e-06, "loss": 0.4277, "step": 50410 }, { "epoch": 5.050082636349978, "grad_norm": 2.075080633163452, "learning_rate": 9.002361320160255e-06, "loss": 0.4076, "step": 50420 }, { "epoch": 5.051084289076977, "grad_norm": 2.275829315185547, "learning_rate": 8.993714720724084e-06, "loss": 0.4366, "step": 50430 }, { "epoch": 5.052085941803977, "grad_norm": 2.0190999507904053, "learning_rate": 8.985071364899072e-06, "loss": 0.4001, "step": 50440 }, { "epoch": 5.053087594530976, "grad_norm": 1.6728298664093018, "learning_rate": 8.976431254436769e-06, "loss": 0.3575, "step": 50450 }, { "epoch": 5.054089247257975, "grad_norm": 2.0061964988708496, "learning_rate": 8.967794391088052e-06, "loss": 0.3538, "step": 50460 }, { "epoch": 5.055090899984975, "grad_norm": 2.0933334827423096, "learning_rate": 8.959160776603152e-06, "loss": 0.3712, "step": 50470 }, { "epoch": 5.056092552711974, "grad_norm": 2.1422982215881348, "learning_rate": 8.950530412731634e-06, "loss": 0.3951, "step": 50480 }, { "epoch": 5.0570942054389745, "grad_norm": 2.5245361328125, "learning_rate": 8.941903301222412e-06, "loss": 0.3951, "step": 50490 }, { "epoch": 5.058095858165974, "grad_norm": 1.8500747680664062, "learning_rate": 8.933279443823733e-06, "loss": 0.3443, "step": 50500 }, { "epoch": 5.059097510892974, "grad_norm": 1.7166526317596436, "learning_rate": 8.92465884228319e-06, "loss": 0.4024, "step": 50510 }, { "epoch": 5.060099163619973, "grad_norm": 1.9748985767364502, "learning_rate": 8.916041498347712e-06, "loss": 0.3806, "step": 50520 }, { "epoch": 5.061100816346973, "grad_norm": 2.0966179370880127, "learning_rate": 8.907427413763573e-06, "loss": 0.3958, "step": 50530 }, { "epoch": 5.062102469073972, "grad_norm": 3.496039867401123, "learning_rate": 8.898816590276379e-06, "loss": 0.3929, "step": 50540 }, { "epoch": 5.063104121800972, "grad_norm": 1.860559105873108, "learning_rate": 8.890209029631086e-06, "loss": 0.3346, "step": 50550 }, { "epoch": 5.064105774527971, "grad_norm": 2.681044340133667, "learning_rate": 8.881604733571977e-06, "loss": 0.3904, "step": 50560 }, { "epoch": 5.06510742725497, "grad_norm": 1.6064103841781616, "learning_rate": 8.873003703842681e-06, "loss": 0.4072, "step": 50570 }, { "epoch": 5.06610907998197, "grad_norm": 2.7219972610473633, "learning_rate": 8.864405942186163e-06, "loss": 0.4058, "step": 50580 }, { "epoch": 5.0671107327089695, "grad_norm": 2.393876314163208, "learning_rate": 8.855811450344729e-06, "loss": 0.455, "step": 50590 }, { "epoch": 5.0681123854359695, "grad_norm": 2.1746304035186768, "learning_rate": 8.847220230060014e-06, "loss": 0.4109, "step": 50600 }, { "epoch": 5.069114038162969, "grad_norm": 1.8715713024139404, "learning_rate": 8.838632283072998e-06, "loss": 0.346, "step": 50610 }, { "epoch": 5.070115690889969, "grad_norm": 2.0678677558898926, "learning_rate": 8.830047611123992e-06, "loss": 0.3984, "step": 50620 }, { "epoch": 5.071117343616968, "grad_norm": 1.8897209167480469, "learning_rate": 8.821466215952651e-06, "loss": 0.3794, "step": 50630 }, { "epoch": 5.072118996343968, "grad_norm": 3.4548091888427734, "learning_rate": 8.81288809929796e-06, "loss": 0.3808, "step": 50640 }, { "epoch": 5.073120649070967, "grad_norm": 1.9121617078781128, "learning_rate": 8.804313262898234e-06, "loss": 0.4143, "step": 50650 }, { "epoch": 5.074122301797967, "grad_norm": 2.0208704471588135, "learning_rate": 8.795741708491139e-06, "loss": 0.3962, "step": 50660 }, { "epoch": 5.075123954524966, "grad_norm": 2.1051366329193115, "learning_rate": 8.787173437813664e-06, "loss": 0.3973, "step": 50670 }, { "epoch": 5.076125607251965, "grad_norm": 2.504681348800659, "learning_rate": 8.778608452602136e-06, "loss": 0.406, "step": 50680 }, { "epoch": 5.077127259978965, "grad_norm": 2.3707287311553955, "learning_rate": 8.770046754592211e-06, "loss": 0.4546, "step": 50690 }, { "epoch": 5.078128912705965, "grad_norm": 2.108637809753418, "learning_rate": 8.761488345518893e-06, "loss": 0.3693, "step": 50700 }, { "epoch": 5.079130565432965, "grad_norm": 2.4608542919158936, "learning_rate": 8.752933227116503e-06, "loss": 0.4373, "step": 50710 }, { "epoch": 5.080132218159964, "grad_norm": 2.6883091926574707, "learning_rate": 8.7443814011187e-06, "loss": 0.4489, "step": 50720 }, { "epoch": 5.081133870886964, "grad_norm": 1.7556604146957397, "learning_rate": 8.735832869258486e-06, "loss": 0.3975, "step": 50730 }, { "epoch": 5.082135523613963, "grad_norm": 2.1072566509246826, "learning_rate": 8.727287633268182e-06, "loss": 0.4055, "step": 50740 }, { "epoch": 5.083137176340963, "grad_norm": 2.7997405529022217, "learning_rate": 8.718745694879451e-06, "loss": 0.4077, "step": 50750 }, { "epoch": 5.084138829067962, "grad_norm": 2.5607924461364746, "learning_rate": 8.710207055823272e-06, "loss": 0.4069, "step": 50760 }, { "epoch": 5.085140481794961, "grad_norm": 2.0641298294067383, "learning_rate": 8.701671717829993e-06, "loss": 0.371, "step": 50770 }, { "epoch": 5.086142134521961, "grad_norm": 2.205183982849121, "learning_rate": 8.69313968262924e-06, "loss": 0.4074, "step": 50780 }, { "epoch": 5.08714378724896, "grad_norm": 2.3521621227264404, "learning_rate": 8.684610951950006e-06, "loss": 0.3741, "step": 50790 }, { "epoch": 5.0881454399759605, "grad_norm": 2.01767635345459, "learning_rate": 8.676085527520605e-06, "loss": 0.4264, "step": 50800 }, { "epoch": 5.08914709270296, "grad_norm": 2.336679458618164, "learning_rate": 8.66756341106868e-06, "loss": 0.4016, "step": 50810 }, { "epoch": 5.09014874542996, "grad_norm": 2.2592053413391113, "learning_rate": 8.659044604321206e-06, "loss": 0.4801, "step": 50820 }, { "epoch": 5.091150398156959, "grad_norm": 2.573158025741577, "learning_rate": 8.65052910900448e-06, "loss": 0.4175, "step": 50830 }, { "epoch": 5.092152050883959, "grad_norm": 2.452357292175293, "learning_rate": 8.642016926844154e-06, "loss": 0.4282, "step": 50840 }, { "epoch": 5.093153703610958, "grad_norm": 1.6255464553833008, "learning_rate": 8.633508059565166e-06, "loss": 0.4136, "step": 50850 }, { "epoch": 5.094155356337958, "grad_norm": 1.7420393228530884, "learning_rate": 8.625002508891813e-06, "loss": 0.3775, "step": 50860 }, { "epoch": 5.095157009064957, "grad_norm": 2.3295178413391113, "learning_rate": 8.6165002765477e-06, "loss": 0.4032, "step": 50870 }, { "epoch": 5.096158661791956, "grad_norm": 1.9889898300170898, "learning_rate": 8.608001364255803e-06, "loss": 0.4529, "step": 50880 }, { "epoch": 5.097160314518956, "grad_norm": 2.4790945053100586, "learning_rate": 8.599505773738365e-06, "loss": 0.3701, "step": 50890 }, { "epoch": 5.0981619672459555, "grad_norm": 2.510486602783203, "learning_rate": 8.59101350671698e-06, "loss": 0.4356, "step": 50900 }, { "epoch": 5.0991636199729555, "grad_norm": 2.439235210418701, "learning_rate": 8.582524564912604e-06, "loss": 0.4114, "step": 50910 }, { "epoch": 5.100165272699955, "grad_norm": 2.2638766765594482, "learning_rate": 8.574038950045457e-06, "loss": 0.3951, "step": 50920 }, { "epoch": 5.101166925426955, "grad_norm": 2.317436933517456, "learning_rate": 8.565556663835131e-06, "loss": 0.4255, "step": 50930 }, { "epoch": 5.102168578153954, "grad_norm": 2.248082160949707, "learning_rate": 8.557077708000514e-06, "loss": 0.445, "step": 50940 }, { "epoch": 5.103170230880954, "grad_norm": 2.4589478969573975, "learning_rate": 8.54860208425986e-06, "loss": 0.3887, "step": 50950 }, { "epoch": 5.104171883607953, "grad_norm": 2.3282318115234375, "learning_rate": 8.540129794330699e-06, "loss": 0.3674, "step": 50960 }, { "epoch": 5.105173536334953, "grad_norm": 1.6831501722335815, "learning_rate": 8.5316608399299e-06, "loss": 0.4535, "step": 50970 }, { "epoch": 5.106175189061952, "grad_norm": 1.7259416580200195, "learning_rate": 8.523195222773688e-06, "loss": 0.3896, "step": 50980 }, { "epoch": 5.107176841788951, "grad_norm": 2.5421462059020996, "learning_rate": 8.514732944577583e-06, "loss": 0.3938, "step": 50990 }, { "epoch": 5.108178494515951, "grad_norm": 2.577516794204712, "learning_rate": 8.506274007056412e-06, "loss": 0.4579, "step": 51000 }, { "epoch": 5.1091801472429506, "grad_norm": 2.2617828845977783, "learning_rate": 8.497818411924363e-06, "loss": 0.4584, "step": 51010 }, { "epoch": 5.110181799969951, "grad_norm": 2.734596014022827, "learning_rate": 8.489366160894937e-06, "loss": 0.4102, "step": 51020 }, { "epoch": 5.11118345269695, "grad_norm": 2.1711714267730713, "learning_rate": 8.480917255680929e-06, "loss": 0.4107, "step": 51030 }, { "epoch": 5.11218510542395, "grad_norm": 2.1046245098114014, "learning_rate": 8.472471697994478e-06, "loss": 0.4126, "step": 51040 }, { "epoch": 5.113186758150949, "grad_norm": 2.018583059310913, "learning_rate": 8.464029489547057e-06, "loss": 0.3639, "step": 51050 }, { "epoch": 5.114188410877949, "grad_norm": 2.4939472675323486, "learning_rate": 8.455590632049451e-06, "loss": 0.391, "step": 51060 }, { "epoch": 5.115190063604948, "grad_norm": 2.2978515625, "learning_rate": 8.447155127211734e-06, "loss": 0.3951, "step": 51070 }, { "epoch": 5.116191716331948, "grad_norm": 2.4101288318634033, "learning_rate": 8.438722976743352e-06, "loss": 0.4014, "step": 51080 }, { "epoch": 5.117193369058947, "grad_norm": 1.3514302968978882, "learning_rate": 8.430294182353049e-06, "loss": 0.3879, "step": 51090 }, { "epoch": 5.118195021785946, "grad_norm": 3.136054277420044, "learning_rate": 8.421868745748873e-06, "loss": 0.4316, "step": 51100 }, { "epoch": 5.1191966745129465, "grad_norm": 2.85766339302063, "learning_rate": 8.4134466686382e-06, "loss": 0.3821, "step": 51110 }, { "epoch": 5.120198327239946, "grad_norm": 2.4241206645965576, "learning_rate": 8.405027952727754e-06, "loss": 0.5232, "step": 51120 }, { "epoch": 5.121199979966946, "grad_norm": 1.8085269927978516, "learning_rate": 8.39661259972355e-06, "loss": 0.4257, "step": 51130 }, { "epoch": 5.122201632693945, "grad_norm": 1.9889925718307495, "learning_rate": 8.388200611330902e-06, "loss": 0.4042, "step": 51140 }, { "epoch": 5.123203285420945, "grad_norm": 1.835249662399292, "learning_rate": 8.379791989254493e-06, "loss": 0.3501, "step": 51150 }, { "epoch": 5.124204938147944, "grad_norm": 2.209379196166992, "learning_rate": 8.371386735198292e-06, "loss": 0.3846, "step": 51160 }, { "epoch": 5.125206590874944, "grad_norm": 1.8162431716918945, "learning_rate": 8.36298485086559e-06, "loss": 0.4411, "step": 51170 }, { "epoch": 5.126208243601943, "grad_norm": 2.113241195678711, "learning_rate": 8.354586337958983e-06, "loss": 0.4041, "step": 51180 }, { "epoch": 5.127209896328942, "grad_norm": 1.9983274936676025, "learning_rate": 8.346191198180414e-06, "loss": 0.379, "step": 51190 }, { "epoch": 5.128211549055942, "grad_norm": 2.5187878608703613, "learning_rate": 8.337799433231126e-06, "loss": 0.4103, "step": 51200 }, { "epoch": 5.1292132017829415, "grad_norm": 2.1544573307037354, "learning_rate": 8.329411044811653e-06, "loss": 0.4325, "step": 51210 }, { "epoch": 5.1302148545099415, "grad_norm": 2.4598071575164795, "learning_rate": 8.321026034621896e-06, "loss": 0.3789, "step": 51220 }, { "epoch": 5.131216507236941, "grad_norm": 2.138986349105835, "learning_rate": 8.312644404361033e-06, "loss": 0.4073, "step": 51230 }, { "epoch": 5.132218159963941, "grad_norm": 2.2500813007354736, "learning_rate": 8.30426615572758e-06, "loss": 0.4308, "step": 51240 }, { "epoch": 5.13321981269094, "grad_norm": 2.7701327800750732, "learning_rate": 8.295891290419334e-06, "loss": 0.3836, "step": 51250 }, { "epoch": 5.13422146541794, "grad_norm": 1.6952491998672485, "learning_rate": 8.287519810133443e-06, "loss": 0.4298, "step": 51260 }, { "epoch": 5.135223118144939, "grad_norm": 1.9191524982452393, "learning_rate": 8.279151716566358e-06, "loss": 0.3448, "step": 51270 }, { "epoch": 5.136224770871939, "grad_norm": 1.756516695022583, "learning_rate": 8.270787011413833e-06, "loss": 0.4178, "step": 51280 }, { "epoch": 5.137226423598938, "grad_norm": 1.841568112373352, "learning_rate": 8.262425696370949e-06, "loss": 0.3687, "step": 51290 }, { "epoch": 5.138228076325937, "grad_norm": 2.1284594535827637, "learning_rate": 8.254067773132085e-06, "loss": 0.3736, "step": 51300 }, { "epoch": 5.139229729052937, "grad_norm": 1.7997220754623413, "learning_rate": 8.24571324339096e-06, "loss": 0.3807, "step": 51310 }, { "epoch": 5.1402313817799365, "grad_norm": 2.3964970111846924, "learning_rate": 8.237362108840555e-06, "loss": 0.4019, "step": 51320 }, { "epoch": 5.141233034506937, "grad_norm": 2.325812816619873, "learning_rate": 8.22901437117322e-06, "loss": 0.3861, "step": 51330 }, { "epoch": 5.142234687233936, "grad_norm": 2.397308826446533, "learning_rate": 8.220670032080587e-06, "loss": 0.4339, "step": 51340 }, { "epoch": 5.143236339960936, "grad_norm": 2.3338468074798584, "learning_rate": 8.212329093253605e-06, "loss": 0.4321, "step": 51350 }, { "epoch": 5.144237992687935, "grad_norm": 1.8569867610931396, "learning_rate": 8.203991556382523e-06, "loss": 0.3943, "step": 51360 }, { "epoch": 5.145239645414935, "grad_norm": 1.970132827758789, "learning_rate": 8.195657423156921e-06, "loss": 0.3841, "step": 51370 }, { "epoch": 5.146241298141934, "grad_norm": 2.166137933731079, "learning_rate": 8.187326695265671e-06, "loss": 0.4063, "step": 51380 }, { "epoch": 5.147242950868934, "grad_norm": 2.590151786804199, "learning_rate": 8.178999374396967e-06, "loss": 0.4072, "step": 51390 }, { "epoch": 5.148244603595933, "grad_norm": 2.1858596801757812, "learning_rate": 8.170675462238306e-06, "loss": 0.3971, "step": 51400 }, { "epoch": 5.149246256322932, "grad_norm": 2.6428606510162354, "learning_rate": 8.162354960476498e-06, "loss": 0.4001, "step": 51410 }, { "epoch": 5.1502479090499325, "grad_norm": 2.1872785091400146, "learning_rate": 8.154037870797657e-06, "loss": 0.3665, "step": 51420 }, { "epoch": 5.151249561776932, "grad_norm": 2.6396920680999756, "learning_rate": 8.14572419488721e-06, "loss": 0.3787, "step": 51430 }, { "epoch": 5.152251214503932, "grad_norm": 2.438502073287964, "learning_rate": 8.137413934429893e-06, "loss": 0.3889, "step": 51440 }, { "epoch": 5.153252867230931, "grad_norm": 2.2407338619232178, "learning_rate": 8.12910709110975e-06, "loss": 0.4352, "step": 51450 }, { "epoch": 5.154254519957931, "grad_norm": 1.8680403232574463, "learning_rate": 8.120803666610122e-06, "loss": 0.367, "step": 51460 }, { "epoch": 5.15525617268493, "grad_norm": 2.1846930980682373, "learning_rate": 8.112503662613672e-06, "loss": 0.3574, "step": 51470 }, { "epoch": 5.15625782541193, "grad_norm": 1.9968862533569336, "learning_rate": 8.104207080802361e-06, "loss": 0.4072, "step": 51480 }, { "epoch": 5.157259478138929, "grad_norm": 2.1309328079223633, "learning_rate": 8.09591392285746e-06, "loss": 0.4554, "step": 51490 }, { "epoch": 5.158261130865929, "grad_norm": 2.3871166706085205, "learning_rate": 8.087624190459545e-06, "loss": 0.4323, "step": 51500 }, { "epoch": 5.159262783592928, "grad_norm": 2.2008261680603027, "learning_rate": 8.079337885288496e-06, "loss": 0.4549, "step": 51510 }, { "epoch": 5.1602644363199275, "grad_norm": 2.131897211074829, "learning_rate": 8.071055009023505e-06, "loss": 0.4221, "step": 51520 }, { "epoch": 5.1612660890469275, "grad_norm": 2.741530418395996, "learning_rate": 8.062775563343056e-06, "loss": 0.4023, "step": 51530 }, { "epoch": 5.162267741773927, "grad_norm": 1.911015272140503, "learning_rate": 8.054499549924955e-06, "loss": 0.4323, "step": 51540 }, { "epoch": 5.163269394500927, "grad_norm": 2.1528337001800537, "learning_rate": 8.046226970446299e-06, "loss": 0.4609, "step": 51550 }, { "epoch": 5.164271047227926, "grad_norm": 2.0719454288482666, "learning_rate": 8.037957826583497e-06, "loss": 0.3626, "step": 51560 }, { "epoch": 5.165272699954926, "grad_norm": 2.4185614585876465, "learning_rate": 8.029692120012255e-06, "loss": 0.4371, "step": 51570 }, { "epoch": 5.166274352681925, "grad_norm": 2.706132411956787, "learning_rate": 8.021429852407592e-06, "loss": 0.4581, "step": 51580 }, { "epoch": 5.167276005408925, "grad_norm": 2.2206037044525146, "learning_rate": 8.013171025443816e-06, "loss": 0.3972, "step": 51590 }, { "epoch": 5.168277658135924, "grad_norm": 1.9738082885742188, "learning_rate": 8.004915640794553e-06, "loss": 0.3954, "step": 51600 }, { "epoch": 5.169279310862924, "grad_norm": 2.3320820331573486, "learning_rate": 7.996663700132723e-06, "loss": 0.4122, "step": 51610 }, { "epoch": 5.170280963589923, "grad_norm": 1.627105474472046, "learning_rate": 7.988415205130545e-06, "loss": 0.3838, "step": 51620 }, { "epoch": 5.1712826163169225, "grad_norm": 1.9464679956436157, "learning_rate": 7.980170157459549e-06, "loss": 0.3813, "step": 51630 }, { "epoch": 5.172284269043923, "grad_norm": 2.1501245498657227, "learning_rate": 7.971928558790562e-06, "loss": 0.4326, "step": 51640 }, { "epoch": 5.173285921770922, "grad_norm": 1.8441542387008667, "learning_rate": 7.963690410793709e-06, "loss": 0.3666, "step": 51650 }, { "epoch": 5.174287574497922, "grad_norm": 1.986935019493103, "learning_rate": 7.955455715138419e-06, "loss": 0.4267, "step": 51660 }, { "epoch": 5.175289227224921, "grad_norm": 2.072371006011963, "learning_rate": 7.94722447349342e-06, "loss": 0.4365, "step": 51670 }, { "epoch": 5.176290879951921, "grad_norm": 1.5192880630493164, "learning_rate": 7.938996687526745e-06, "loss": 0.3503, "step": 51680 }, { "epoch": 5.17729253267892, "grad_norm": 2.655179500579834, "learning_rate": 7.930772358905719e-06, "loss": 0.4375, "step": 51690 }, { "epoch": 5.17829418540592, "grad_norm": 2.249799966812134, "learning_rate": 7.92255148929697e-06, "loss": 0.3607, "step": 51700 }, { "epoch": 5.179295838132919, "grad_norm": 2.191415548324585, "learning_rate": 7.914334080366428e-06, "loss": 0.3986, "step": 51710 }, { "epoch": 5.180297490859919, "grad_norm": 2.2049078941345215, "learning_rate": 7.906120133779318e-06, "loss": 0.3843, "step": 51720 }, { "epoch": 5.1812991435869185, "grad_norm": 3.0092198848724365, "learning_rate": 7.897909651200152e-06, "loss": 0.4309, "step": 51730 }, { "epoch": 5.182300796313918, "grad_norm": 2.194514751434326, "learning_rate": 7.889702634292785e-06, "loss": 0.4127, "step": 51740 }, { "epoch": 5.183302449040918, "grad_norm": 2.2221009731292725, "learning_rate": 7.881499084720301e-06, "loss": 0.3811, "step": 51750 }, { "epoch": 5.184304101767917, "grad_norm": 1.7878801822662354, "learning_rate": 7.873299004145136e-06, "loss": 0.3726, "step": 51760 }, { "epoch": 5.185305754494917, "grad_norm": 2.5555198192596436, "learning_rate": 7.86510239422899e-06, "loss": 0.4467, "step": 51770 }, { "epoch": 5.186307407221916, "grad_norm": 2.4053938388824463, "learning_rate": 7.8569092566329e-06, "loss": 0.3929, "step": 51780 }, { "epoch": 5.187309059948916, "grad_norm": 2.1048076152801514, "learning_rate": 7.84871959301715e-06, "loss": 0.3874, "step": 51790 }, { "epoch": 5.188310712675915, "grad_norm": 2.1604602336883545, "learning_rate": 7.840533405041343e-06, "loss": 0.381, "step": 51800 }, { "epoch": 5.189312365402915, "grad_norm": 2.2942380905151367, "learning_rate": 7.8323506943644e-06, "loss": 0.4038, "step": 51810 }, { "epoch": 5.190314018129914, "grad_norm": 2.568305015563965, "learning_rate": 7.824171462644493e-06, "loss": 0.4428, "step": 51820 }, { "epoch": 5.1913156708569135, "grad_norm": 2.6570141315460205, "learning_rate": 7.81599571153912e-06, "loss": 0.4239, "step": 51830 }, { "epoch": 5.1923173235839135, "grad_norm": 2.121030807495117, "learning_rate": 7.807823442705056e-06, "loss": 0.4091, "step": 51840 }, { "epoch": 5.193318976310913, "grad_norm": 2.315417528152466, "learning_rate": 7.799654657798402e-06, "loss": 0.39, "step": 51850 }, { "epoch": 5.194320629037913, "grad_norm": 2.200780153274536, "learning_rate": 7.79148935847451e-06, "loss": 0.3821, "step": 51860 }, { "epoch": 5.195322281764912, "grad_norm": 2.2050414085388184, "learning_rate": 7.783327546388045e-06, "loss": 0.414, "step": 51870 }, { "epoch": 5.196323934491912, "grad_norm": 2.513406991958618, "learning_rate": 7.77516922319298e-06, "loss": 0.4066, "step": 51880 }, { "epoch": 5.197325587218911, "grad_norm": 2.4052371978759766, "learning_rate": 7.767014390542565e-06, "loss": 0.3507, "step": 51890 }, { "epoch": 5.198327239945911, "grad_norm": 2.228341579437256, "learning_rate": 7.758863050089337e-06, "loss": 0.4601, "step": 51900 }, { "epoch": 5.19932889267291, "grad_norm": 2.076115608215332, "learning_rate": 7.750715203485127e-06, "loss": 0.3405, "step": 51910 }, { "epoch": 5.20033054539991, "grad_norm": 2.2756433486938477, "learning_rate": 7.742570852381092e-06, "loss": 0.4388, "step": 51920 }, { "epoch": 5.201332198126909, "grad_norm": 1.7769001722335815, "learning_rate": 7.734429998427626e-06, "loss": 0.3725, "step": 51930 }, { "epoch": 5.2023338508539085, "grad_norm": 2.7252211570739746, "learning_rate": 7.726292643274441e-06, "loss": 0.4404, "step": 51940 }, { "epoch": 5.203335503580909, "grad_norm": 2.693631887435913, "learning_rate": 7.718158788570557e-06, "loss": 0.3966, "step": 51950 }, { "epoch": 5.204337156307908, "grad_norm": 1.9786999225616455, "learning_rate": 7.71002843596427e-06, "loss": 0.3878, "step": 51960 }, { "epoch": 5.205338809034908, "grad_norm": 2.3402976989746094, "learning_rate": 7.701901587103146e-06, "loss": 0.391, "step": 51970 }, { "epoch": 5.206340461761907, "grad_norm": 1.6999900341033936, "learning_rate": 7.69377824363406e-06, "loss": 0.4084, "step": 51980 }, { "epoch": 5.207342114488907, "grad_norm": 2.1598548889160156, "learning_rate": 7.685658407203192e-06, "loss": 0.3995, "step": 51990 }, { "epoch": 5.208343767215906, "grad_norm": 2.034207582473755, "learning_rate": 7.677542079455994e-06, "loss": 0.4061, "step": 52000 }, { "epoch": 5.209345419942906, "grad_norm": 2.343937635421753, "learning_rate": 7.669429262037183e-06, "loss": 0.355, "step": 52010 }, { "epoch": 5.210347072669905, "grad_norm": 2.3966968059539795, "learning_rate": 7.661319956590817e-06, "loss": 0.3928, "step": 52020 }, { "epoch": 5.211348725396905, "grad_norm": 3.0318691730499268, "learning_rate": 7.653214164760217e-06, "loss": 0.4378, "step": 52030 }, { "epoch": 5.2123503781239044, "grad_norm": 2.464893341064453, "learning_rate": 7.645111888187967e-06, "loss": 0.3915, "step": 52040 }, { "epoch": 5.213352030850904, "grad_norm": 1.6739379167556763, "learning_rate": 7.637013128515966e-06, "loss": 0.3769, "step": 52050 }, { "epoch": 5.214353683577904, "grad_norm": 1.666397213935852, "learning_rate": 7.6289178873854086e-06, "loss": 0.427, "step": 52060 }, { "epoch": 5.215355336304903, "grad_norm": 2.2799434661865234, "learning_rate": 7.62082616643677e-06, "loss": 0.3725, "step": 52070 }, { "epoch": 5.216356989031903, "grad_norm": 2.4019501209259033, "learning_rate": 7.612737967309777e-06, "loss": 0.3981, "step": 52080 }, { "epoch": 5.217358641758902, "grad_norm": 2.1799919605255127, "learning_rate": 7.604653291643496e-06, "loss": 0.4186, "step": 52090 }, { "epoch": 5.218360294485902, "grad_norm": 2.0130274295806885, "learning_rate": 7.59657214107625e-06, "loss": 0.4132, "step": 52100 }, { "epoch": 5.219361947212901, "grad_norm": 1.9630812406539917, "learning_rate": 7.588494517245656e-06, "loss": 0.351, "step": 52110 }, { "epoch": 5.220363599939901, "grad_norm": 2.2186408042907715, "learning_rate": 7.5804204217885925e-06, "loss": 0.4274, "step": 52120 }, { "epoch": 5.2213652526669, "grad_norm": 1.6884628534317017, "learning_rate": 7.572349856341265e-06, "loss": 0.3911, "step": 52130 }, { "epoch": 5.2223669053939, "grad_norm": 2.2494137287139893, "learning_rate": 7.564282822539143e-06, "loss": 0.4608, "step": 52140 }, { "epoch": 5.2233685581208995, "grad_norm": 2.3293933868408203, "learning_rate": 7.556219322016958e-06, "loss": 0.4025, "step": 52150 }, { "epoch": 5.224370210847899, "grad_norm": 2.4375548362731934, "learning_rate": 7.548159356408766e-06, "loss": 0.4536, "step": 52160 }, { "epoch": 5.225371863574899, "grad_norm": 1.501153826713562, "learning_rate": 7.540102927347883e-06, "loss": 0.393, "step": 52170 }, { "epoch": 5.226373516301898, "grad_norm": 2.5104174613952637, "learning_rate": 7.53205003646692e-06, "loss": 0.4326, "step": 52180 }, { "epoch": 5.227375169028898, "grad_norm": 2.1039373874664307, "learning_rate": 7.524000685397739e-06, "loss": 0.4187, "step": 52190 }, { "epoch": 5.228376821755897, "grad_norm": 2.2166614532470703, "learning_rate": 7.515954875771533e-06, "loss": 0.4832, "step": 52200 }, { "epoch": 5.229378474482897, "grad_norm": 2.8615593910217285, "learning_rate": 7.507912609218759e-06, "loss": 0.3768, "step": 52210 }, { "epoch": 5.230380127209896, "grad_norm": 2.372994899749756, "learning_rate": 7.499873887369119e-06, "loss": 0.4151, "step": 52220 }, { "epoch": 5.231381779936896, "grad_norm": 2.192148447036743, "learning_rate": 7.491838711851659e-06, "loss": 0.4568, "step": 52230 }, { "epoch": 5.232383432663895, "grad_norm": 2.341484785079956, "learning_rate": 7.483807084294664e-06, "loss": 0.3927, "step": 52240 }, { "epoch": 5.2333850853908945, "grad_norm": 1.8965469598770142, "learning_rate": 7.475779006325723e-06, "loss": 0.4777, "step": 52250 }, { "epoch": 5.234386738117895, "grad_norm": 2.039476156234741, "learning_rate": 7.467754479571667e-06, "loss": 0.4218, "step": 52260 }, { "epoch": 5.235388390844894, "grad_norm": 1.3191801309585571, "learning_rate": 7.459733505658661e-06, "loss": 0.3756, "step": 52270 }, { "epoch": 5.236390043571894, "grad_norm": 2.1452765464782715, "learning_rate": 7.4517160862121185e-06, "loss": 0.4151, "step": 52280 }, { "epoch": 5.237391696298893, "grad_norm": 1.9325929880142212, "learning_rate": 7.443702222856735e-06, "loss": 0.3947, "step": 52290 }, { "epoch": 5.238393349025893, "grad_norm": 2.707630157470703, "learning_rate": 7.435691917216489e-06, "loss": 0.4178, "step": 52300 }, { "epoch": 5.239395001752892, "grad_norm": 2.664762020111084, "learning_rate": 7.427685170914636e-06, "loss": 0.3769, "step": 52310 }, { "epoch": 5.240396654479892, "grad_norm": 1.8347071409225464, "learning_rate": 7.4196819855737255e-06, "loss": 0.4197, "step": 52320 }, { "epoch": 5.241398307206891, "grad_norm": 1.889101266860962, "learning_rate": 7.411682362815542e-06, "loss": 0.411, "step": 52330 }, { "epoch": 5.242399959933891, "grad_norm": 3.006286144256592, "learning_rate": 7.403686304261204e-06, "loss": 0.4188, "step": 52340 }, { "epoch": 5.24340161266089, "grad_norm": 2.348801851272583, "learning_rate": 7.3956938115310734e-06, "loss": 0.4278, "step": 52350 }, { "epoch": 5.24440326538789, "grad_norm": 2.0983736515045166, "learning_rate": 7.387704886244798e-06, "loss": 0.4544, "step": 52360 }, { "epoch": 5.24540491811489, "grad_norm": 3.7822070121765137, "learning_rate": 7.3797195300213005e-06, "loss": 0.4355, "step": 52370 }, { "epoch": 5.246406570841889, "grad_norm": 2.2507216930389404, "learning_rate": 7.371737744478785e-06, "loss": 0.4098, "step": 52380 }, { "epoch": 5.247408223568889, "grad_norm": 2.440915107727051, "learning_rate": 7.363759531234729e-06, "loss": 0.3955, "step": 52390 }, { "epoch": 5.248409876295888, "grad_norm": 1.8399525880813599, "learning_rate": 7.355784891905882e-06, "loss": 0.3885, "step": 52400 }, { "epoch": 5.249411529022888, "grad_norm": 2.2914631366729736, "learning_rate": 7.347813828108277e-06, "loss": 0.411, "step": 52410 }, { "epoch": 5.250413181749887, "grad_norm": 2.0582337379455566, "learning_rate": 7.339846341457221e-06, "loss": 0.3832, "step": 52420 }, { "epoch": 5.251414834476887, "grad_norm": 1.9887348413467407, "learning_rate": 7.331882433567289e-06, "loss": 0.4259, "step": 52430 }, { "epoch": 5.252416487203886, "grad_norm": 2.355299472808838, "learning_rate": 7.323922106052339e-06, "loss": 0.3626, "step": 52440 }, { "epoch": 5.253418139930886, "grad_norm": 2.0583131313323975, "learning_rate": 7.315965360525498e-06, "loss": 0.4364, "step": 52450 }, { "epoch": 5.2544197926578855, "grad_norm": 2.732943296432495, "learning_rate": 7.308012198599173e-06, "loss": 0.4869, "step": 52460 }, { "epoch": 5.255421445384885, "grad_norm": 2.507206439971924, "learning_rate": 7.300062621885037e-06, "loss": 0.4206, "step": 52470 }, { "epoch": 5.256423098111885, "grad_norm": 2.7191555500030518, "learning_rate": 7.292116631994045e-06, "loss": 0.411, "step": 52480 }, { "epoch": 5.257424750838884, "grad_norm": 1.7782527208328247, "learning_rate": 7.284174230536417e-06, "loss": 0.3589, "step": 52490 }, { "epoch": 5.258426403565884, "grad_norm": 2.6875455379486084, "learning_rate": 7.276235419121649e-06, "loss": 0.3732, "step": 52500 }, { "epoch": 5.259428056292883, "grad_norm": 2.146796464920044, "learning_rate": 7.268300199358516e-06, "loss": 0.3572, "step": 52510 }, { "epoch": 5.260429709019883, "grad_norm": 2.5086498260498047, "learning_rate": 7.260368572855053e-06, "loss": 0.4919, "step": 52520 }, { "epoch": 5.261431361746882, "grad_norm": 2.6334359645843506, "learning_rate": 7.2524405412185775e-06, "loss": 0.383, "step": 52530 }, { "epoch": 5.262433014473882, "grad_norm": 2.3560729026794434, "learning_rate": 7.244516106055671e-06, "loss": 0.4374, "step": 52540 }, { "epoch": 5.263434667200881, "grad_norm": 1.7651573419570923, "learning_rate": 7.23659526897219e-06, "loss": 0.3754, "step": 52550 }, { "epoch": 5.264436319927881, "grad_norm": 2.338216781616211, "learning_rate": 7.228678031573263e-06, "loss": 0.3835, "step": 52560 }, { "epoch": 5.265437972654881, "grad_norm": 2.0452308654785156, "learning_rate": 7.2207643954632865e-06, "loss": 0.4685, "step": 52570 }, { "epoch": 5.26643962538188, "grad_norm": 1.7141194343566895, "learning_rate": 7.212854362245924e-06, "loss": 0.3844, "step": 52580 }, { "epoch": 5.26744127810888, "grad_norm": 2.3181238174438477, "learning_rate": 7.20494793352412e-06, "loss": 0.4083, "step": 52590 }, { "epoch": 5.268442930835879, "grad_norm": 2.631800889968872, "learning_rate": 7.197045110900069e-06, "loss": 0.359, "step": 52600 }, { "epoch": 5.269444583562879, "grad_norm": 1.7754448652267456, "learning_rate": 7.189145895975272e-06, "loss": 0.3636, "step": 52610 }, { "epoch": 5.270446236289878, "grad_norm": 2.115633964538574, "learning_rate": 7.181250290350447e-06, "loss": 0.3871, "step": 52620 }, { "epoch": 5.271447889016878, "grad_norm": 2.290174961090088, "learning_rate": 7.173358295625621e-06, "loss": 0.4456, "step": 52630 }, { "epoch": 5.272449541743877, "grad_norm": 2.639821767807007, "learning_rate": 7.165469913400072e-06, "loss": 0.4184, "step": 52640 }, { "epoch": 5.273451194470877, "grad_norm": 2.0272226333618164, "learning_rate": 7.1575851452723496e-06, "loss": 0.4, "step": 52650 }, { "epoch": 5.274452847197876, "grad_norm": 2.293170213699341, "learning_rate": 7.149703992840276e-06, "loss": 0.3766, "step": 52660 }, { "epoch": 5.2754544999248765, "grad_norm": 3.4424662590026855, "learning_rate": 7.141826457700923e-06, "loss": 0.351, "step": 52670 }, { "epoch": 5.276456152651876, "grad_norm": 1.8073848485946655, "learning_rate": 7.133952541450669e-06, "loss": 0.4577, "step": 52680 }, { "epoch": 5.277457805378875, "grad_norm": 2.518890619277954, "learning_rate": 7.126082245685106e-06, "loss": 0.3885, "step": 52690 }, { "epoch": 5.278459458105875, "grad_norm": 2.2708356380462646, "learning_rate": 7.118215571999129e-06, "loss": 0.3841, "step": 52700 }, { "epoch": 5.279461110832874, "grad_norm": 1.920397162437439, "learning_rate": 7.1103525219868795e-06, "loss": 0.4228, "step": 52710 }, { "epoch": 5.280462763559874, "grad_norm": 2.008230686187744, "learning_rate": 7.102493097241797e-06, "loss": 0.4011, "step": 52720 }, { "epoch": 5.281464416286873, "grad_norm": 1.935886025428772, "learning_rate": 7.094637299356544e-06, "loss": 0.3789, "step": 52730 }, { "epoch": 5.282466069013873, "grad_norm": 2.69047212600708, "learning_rate": 7.0867851299230595e-06, "loss": 0.4479, "step": 52740 }, { "epoch": 5.283467721740872, "grad_norm": 1.8354910612106323, "learning_rate": 7.078936590532584e-06, "loss": 0.3983, "step": 52750 }, { "epoch": 5.284469374467872, "grad_norm": 2.452239990234375, "learning_rate": 7.071091682775569e-06, "loss": 0.4333, "step": 52760 }, { "epoch": 5.2854710271948715, "grad_norm": 2.979823112487793, "learning_rate": 7.063250408241761e-06, "loss": 0.427, "step": 52770 }, { "epoch": 5.2864726799218715, "grad_norm": 2.0987963676452637, "learning_rate": 7.055412768520156e-06, "loss": 0.3653, "step": 52780 }, { "epoch": 5.287474332648871, "grad_norm": 2.366989850997925, "learning_rate": 7.047578765199048e-06, "loss": 0.4078, "step": 52790 }, { "epoch": 5.28847598537587, "grad_norm": 2.4074864387512207, "learning_rate": 7.039748399865937e-06, "loss": 0.4018, "step": 52800 }, { "epoch": 5.28947763810287, "grad_norm": 2.3981125354766846, "learning_rate": 7.031921674107622e-06, "loss": 0.4609, "step": 52810 }, { "epoch": 5.290479290829869, "grad_norm": 2.6520919799804688, "learning_rate": 7.024098589510181e-06, "loss": 0.4054, "step": 52820 }, { "epoch": 5.291480943556869, "grad_norm": 2.526301145553589, "learning_rate": 7.016279147658903e-06, "loss": 0.4026, "step": 52830 }, { "epoch": 5.292482596283868, "grad_norm": 1.9764511585235596, "learning_rate": 7.008463350138381e-06, "loss": 0.406, "step": 52840 }, { "epoch": 5.293484249010868, "grad_norm": 2.1861753463745117, "learning_rate": 7.000651198532446e-06, "loss": 0.4219, "step": 52850 }, { "epoch": 5.294485901737867, "grad_norm": 1.8499925136566162, "learning_rate": 6.992842694424221e-06, "loss": 0.3846, "step": 52860 }, { "epoch": 5.295487554464867, "grad_norm": 2.1721975803375244, "learning_rate": 6.9850378393960495e-06, "loss": 0.4378, "step": 52870 }, { "epoch": 5.2964892071918666, "grad_norm": 2.3228940963745117, "learning_rate": 6.977236635029552e-06, "loss": 0.4515, "step": 52880 }, { "epoch": 5.297490859918867, "grad_norm": 2.2590081691741943, "learning_rate": 6.96943908290563e-06, "loss": 0.4124, "step": 52890 }, { "epoch": 5.298492512645866, "grad_norm": 2.0431101322174072, "learning_rate": 6.961645184604423e-06, "loss": 0.3689, "step": 52900 }, { "epoch": 5.299494165372865, "grad_norm": 1.8751521110534668, "learning_rate": 6.953854941705323e-06, "loss": 0.4161, "step": 52910 }, { "epoch": 5.300495818099865, "grad_norm": 2.33186936378479, "learning_rate": 6.946068355786992e-06, "loss": 0.4703, "step": 52920 }, { "epoch": 5.301497470826864, "grad_norm": 2.136018991470337, "learning_rate": 6.9382854284273715e-06, "loss": 0.3905, "step": 52930 }, { "epoch": 5.302499123553864, "grad_norm": 1.790964126586914, "learning_rate": 6.930506161203618e-06, "loss": 0.4172, "step": 52940 }, { "epoch": 5.303500776280863, "grad_norm": 1.893381118774414, "learning_rate": 6.922730555692172e-06, "loss": 0.4716, "step": 52950 }, { "epoch": 5.304502429007863, "grad_norm": 2.6119513511657715, "learning_rate": 6.914958613468744e-06, "loss": 0.4113, "step": 52960 }, { "epoch": 5.305504081734862, "grad_norm": 1.9697766304016113, "learning_rate": 6.907190336108288e-06, "loss": 0.419, "step": 52970 }, { "epoch": 5.3065057344618625, "grad_norm": 2.429619312286377, "learning_rate": 6.899425725184999e-06, "loss": 0.3799, "step": 52980 }, { "epoch": 5.307507387188862, "grad_norm": 2.6430892944335938, "learning_rate": 6.891664782272347e-06, "loss": 0.3845, "step": 52990 }, { "epoch": 5.308509039915861, "grad_norm": 2.1307921409606934, "learning_rate": 6.883907508943069e-06, "loss": 0.3771, "step": 53000 }, { "epoch": 5.309510692642861, "grad_norm": 2.107192277908325, "learning_rate": 6.876153906769148e-06, "loss": 0.4124, "step": 53010 }, { "epoch": 5.31051234536986, "grad_norm": 2.150707721710205, "learning_rate": 6.868403977321799e-06, "loss": 0.3855, "step": 53020 }, { "epoch": 5.31151399809686, "grad_norm": 2.0903549194335938, "learning_rate": 6.860657722171534e-06, "loss": 0.4535, "step": 53030 }, { "epoch": 5.312515650823859, "grad_norm": 2.294050455093384, "learning_rate": 6.852915142888108e-06, "loss": 0.4359, "step": 53040 }, { "epoch": 5.313517303550859, "grad_norm": 2.1415610313415527, "learning_rate": 6.845176241040505e-06, "loss": 0.4042, "step": 53050 }, { "epoch": 5.314518956277858, "grad_norm": 2.3479254245758057, "learning_rate": 6.83744101819698e-06, "loss": 0.4422, "step": 53060 }, { "epoch": 5.315520609004858, "grad_norm": 2.104673385620117, "learning_rate": 6.8297094759250665e-06, "loss": 0.3555, "step": 53070 }, { "epoch": 5.3165222617318575, "grad_norm": 3.1974856853485107, "learning_rate": 6.8219816157915286e-06, "loss": 0.3773, "step": 53080 }, { "epoch": 5.3175239144588575, "grad_norm": 2.344932794570923, "learning_rate": 6.814257439362368e-06, "loss": 0.3502, "step": 53090 }, { "epoch": 5.318525567185857, "grad_norm": 2.2202727794647217, "learning_rate": 6.806536948202874e-06, "loss": 0.4425, "step": 53100 }, { "epoch": 5.319527219912856, "grad_norm": 3.03460955619812, "learning_rate": 6.798820143877574e-06, "loss": 0.5061, "step": 53110 }, { "epoch": 5.320528872639856, "grad_norm": 1.9732153415679932, "learning_rate": 6.791107027950244e-06, "loss": 0.3563, "step": 53120 }, { "epoch": 5.321530525366855, "grad_norm": 2.2756731510162354, "learning_rate": 6.783397601983918e-06, "loss": 0.3928, "step": 53130 }, { "epoch": 5.322532178093855, "grad_norm": 1.9389190673828125, "learning_rate": 6.775691867540882e-06, "loss": 0.4561, "step": 53140 }, { "epoch": 5.323533830820854, "grad_norm": 2.05364727973938, "learning_rate": 6.7679898261826775e-06, "loss": 0.4577, "step": 53150 }, { "epoch": 5.324535483547854, "grad_norm": 2.402559995651245, "learning_rate": 6.760291479470074e-06, "loss": 0.3989, "step": 53160 }, { "epoch": 5.325537136274853, "grad_norm": 2.256415843963623, "learning_rate": 6.752596828963132e-06, "loss": 0.4353, "step": 53170 }, { "epoch": 5.326538789001853, "grad_norm": 2.008063793182373, "learning_rate": 6.744905876221133e-06, "loss": 0.4673, "step": 53180 }, { "epoch": 5.3275404417288525, "grad_norm": 1.8382278680801392, "learning_rate": 6.737218622802621e-06, "loss": 0.3791, "step": 53190 }, { "epoch": 5.328542094455852, "grad_norm": 3.4138710498809814, "learning_rate": 6.729535070265389e-06, "loss": 0.4359, "step": 53200 }, { "epoch": 5.329543747182852, "grad_norm": 2.198563575744629, "learning_rate": 6.721855220166479e-06, "loss": 0.4072, "step": 53210 }, { "epoch": 5.330545399909851, "grad_norm": 3.2445294857025146, "learning_rate": 6.71417907406218e-06, "loss": 0.4526, "step": 53220 }, { "epoch": 5.331547052636851, "grad_norm": 2.4834206104278564, "learning_rate": 6.706506633508033e-06, "loss": 0.4159, "step": 53230 }, { "epoch": 5.33254870536385, "grad_norm": 2.6208958625793457, "learning_rate": 6.69883790005883e-06, "loss": 0.4058, "step": 53240 }, { "epoch": 5.33355035809085, "grad_norm": 2.0602033138275146, "learning_rate": 6.691172875268609e-06, "loss": 0.3935, "step": 53250 }, { "epoch": 5.334552010817849, "grad_norm": 2.335693359375, "learning_rate": 6.683511560690658e-06, "loss": 0.4759, "step": 53260 }, { "epoch": 5.335553663544849, "grad_norm": 3.5803706645965576, "learning_rate": 6.675853957877512e-06, "loss": 0.4427, "step": 53270 }, { "epoch": 5.336555316271848, "grad_norm": 1.9978817701339722, "learning_rate": 6.668200068380953e-06, "loss": 0.4553, "step": 53280 }, { "epoch": 5.3375569689988485, "grad_norm": 2.189866065979004, "learning_rate": 6.660549893752013e-06, "loss": 0.4083, "step": 53290 }, { "epoch": 5.338558621725848, "grad_norm": 1.6272187232971191, "learning_rate": 6.652903435540972e-06, "loss": 0.373, "step": 53300 }, { "epoch": 5.339560274452847, "grad_norm": 2.4417366981506348, "learning_rate": 6.6452606952973534e-06, "loss": 0.4228, "step": 53310 }, { "epoch": 5.340561927179847, "grad_norm": 2.1256210803985596, "learning_rate": 6.637621674569925e-06, "loss": 0.4603, "step": 53320 }, { "epoch": 5.341563579906846, "grad_norm": 2.3436925411224365, "learning_rate": 6.629986374906707e-06, "loss": 0.4088, "step": 53330 }, { "epoch": 5.342565232633846, "grad_norm": 1.9916789531707764, "learning_rate": 6.622354797854965e-06, "loss": 0.4398, "step": 53340 }, { "epoch": 5.343566885360845, "grad_norm": 3.09892201423645, "learning_rate": 6.614726944961208e-06, "loss": 0.4802, "step": 53350 }, { "epoch": 5.344568538087845, "grad_norm": 2.0518476963043213, "learning_rate": 6.607102817771191e-06, "loss": 0.4275, "step": 53360 }, { "epoch": 5.345570190814844, "grad_norm": 2.4747159481048584, "learning_rate": 6.599482417829908e-06, "loss": 0.457, "step": 53370 }, { "epoch": 5.346571843541844, "grad_norm": 2.272169351577759, "learning_rate": 6.591865746681608e-06, "loss": 0.3775, "step": 53380 }, { "epoch": 5.3475734962688435, "grad_norm": 2.412099599838257, "learning_rate": 6.584252805869781e-06, "loss": 0.3946, "step": 53390 }, { "epoch": 5.3485751489958435, "grad_norm": 2.5232436656951904, "learning_rate": 6.576643596937157e-06, "loss": 0.4018, "step": 53400 }, { "epoch": 5.349576801722843, "grad_norm": 2.147260904312134, "learning_rate": 6.569038121425711e-06, "loss": 0.4361, "step": 53410 }, { "epoch": 5.350578454449842, "grad_norm": 3.0588362216949463, "learning_rate": 6.561436380876668e-06, "loss": 0.4696, "step": 53420 }, { "epoch": 5.351580107176842, "grad_norm": 2.22107195854187, "learning_rate": 6.553838376830485e-06, "loss": 0.3865, "step": 53430 }, { "epoch": 5.352581759903841, "grad_norm": 1.650066614151001, "learning_rate": 6.546244110826874e-06, "loss": 0.3675, "step": 53440 }, { "epoch": 5.353583412630841, "grad_norm": 2.833719253540039, "learning_rate": 6.5386535844047775e-06, "loss": 0.3676, "step": 53450 }, { "epoch": 5.35458506535784, "grad_norm": 1.93025803565979, "learning_rate": 6.53106679910239e-06, "loss": 0.4342, "step": 53460 }, { "epoch": 5.35558671808484, "grad_norm": 2.8691656589508057, "learning_rate": 6.523483756457144e-06, "loss": 0.4971, "step": 53470 }, { "epoch": 5.356588370811839, "grad_norm": 2.115009069442749, "learning_rate": 6.515904458005709e-06, "loss": 0.417, "step": 53480 }, { "epoch": 5.357590023538839, "grad_norm": 1.9475973844528198, "learning_rate": 6.508328905284006e-06, "loss": 0.3852, "step": 53490 }, { "epoch": 5.3585916762658385, "grad_norm": 2.8739306926727295, "learning_rate": 6.500757099827187e-06, "loss": 0.3797, "step": 53500 }, { "epoch": 5.359593328992839, "grad_norm": 2.277933359146118, "learning_rate": 6.493189043169651e-06, "loss": 0.3827, "step": 53510 }, { "epoch": 5.360594981719838, "grad_norm": 2.5828804969787598, "learning_rate": 6.485624736845031e-06, "loss": 0.3898, "step": 53520 }, { "epoch": 5.361596634446837, "grad_norm": 1.9495731592178345, "learning_rate": 6.478064182386212e-06, "loss": 0.406, "step": 53530 }, { "epoch": 5.362598287173837, "grad_norm": 1.9667346477508545, "learning_rate": 6.470507381325303e-06, "loss": 0.4068, "step": 53540 }, { "epoch": 5.363599939900836, "grad_norm": 1.973042368888855, "learning_rate": 6.462954335193666e-06, "loss": 0.4642, "step": 53550 }, { "epoch": 5.364601592627836, "grad_norm": 2.1213204860687256, "learning_rate": 6.455405045521892e-06, "loss": 0.3665, "step": 53560 }, { "epoch": 5.365603245354835, "grad_norm": 2.8752615451812744, "learning_rate": 6.4478595138398185e-06, "loss": 0.4114, "step": 53570 }, { "epoch": 5.366604898081835, "grad_norm": 1.7899949550628662, "learning_rate": 6.440317741676513e-06, "loss": 0.3377, "step": 53580 }, { "epoch": 5.367606550808834, "grad_norm": 2.2464749813079834, "learning_rate": 6.432779730560292e-06, "loss": 0.3849, "step": 53590 }, { "epoch": 5.3686082035358345, "grad_norm": 2.21551513671875, "learning_rate": 6.425245482018702e-06, "loss": 0.3827, "step": 53600 }, { "epoch": 5.369609856262834, "grad_norm": 2.1476612091064453, "learning_rate": 6.41771499757852e-06, "loss": 0.3988, "step": 53610 }, { "epoch": 5.370611508989834, "grad_norm": 2.1985154151916504, "learning_rate": 6.4101882787657916e-06, "loss": 0.4127, "step": 53620 }, { "epoch": 5.371613161716833, "grad_norm": 2.32558012008667, "learning_rate": 6.402665327105756e-06, "loss": 0.455, "step": 53630 }, { "epoch": 5.372614814443832, "grad_norm": 1.937166690826416, "learning_rate": 6.3951461441229185e-06, "loss": 0.3793, "step": 53640 }, { "epoch": 5.373616467170832, "grad_norm": 3.1271660327911377, "learning_rate": 6.38763073134101e-06, "loss": 0.4362, "step": 53650 }, { "epoch": 5.374618119897831, "grad_norm": 2.246852159500122, "learning_rate": 6.3801190902829985e-06, "loss": 0.3768, "step": 53660 }, { "epoch": 5.375619772624831, "grad_norm": 2.1570510864257812, "learning_rate": 6.372611222471092e-06, "loss": 0.3875, "step": 53670 }, { "epoch": 5.37662142535183, "grad_norm": 2.5383236408233643, "learning_rate": 6.365107129426723e-06, "loss": 0.3872, "step": 53680 }, { "epoch": 5.37762307807883, "grad_norm": 2.381948709487915, "learning_rate": 6.3576068126705855e-06, "loss": 0.3625, "step": 53690 }, { "epoch": 5.3786247308058295, "grad_norm": 2.5966947078704834, "learning_rate": 6.350110273722571e-06, "loss": 0.4216, "step": 53700 }, { "epoch": 5.3796263835328295, "grad_norm": 2.2546310424804688, "learning_rate": 6.342617514101826e-06, "loss": 0.4408, "step": 53710 }, { "epoch": 5.380628036259829, "grad_norm": 2.3283772468566895, "learning_rate": 6.335128535326726e-06, "loss": 0.4264, "step": 53720 }, { "epoch": 5.381629688986829, "grad_norm": 2.2052276134490967, "learning_rate": 6.3276433389149045e-06, "loss": 0.436, "step": 53730 }, { "epoch": 5.382631341713828, "grad_norm": 2.5837795734405518, "learning_rate": 6.320161926383186e-06, "loss": 0.423, "step": 53740 }, { "epoch": 5.383632994440827, "grad_norm": 1.8887791633605957, "learning_rate": 6.312684299247648e-06, "loss": 0.3555, "step": 53750 }, { "epoch": 5.384634647167827, "grad_norm": 2.3950350284576416, "learning_rate": 6.305210459023625e-06, "loss": 0.4179, "step": 53760 }, { "epoch": 5.385636299894826, "grad_norm": 3.177776336669922, "learning_rate": 6.297740407225638e-06, "loss": 0.4403, "step": 53770 }, { "epoch": 5.386637952621826, "grad_norm": 2.376591920852661, "learning_rate": 6.290274145367475e-06, "loss": 0.5005, "step": 53780 }, { "epoch": 5.387639605348825, "grad_norm": 2.027867078781128, "learning_rate": 6.282811674962135e-06, "loss": 0.3684, "step": 53790 }, { "epoch": 5.388641258075825, "grad_norm": 2.313084602355957, "learning_rate": 6.27535299752188e-06, "loss": 0.3768, "step": 53800 }, { "epoch": 5.3896429108028245, "grad_norm": 2.342231273651123, "learning_rate": 6.267898114558157e-06, "loss": 0.4171, "step": 53810 }, { "epoch": 5.390644563529825, "grad_norm": 2.253715991973877, "learning_rate": 6.260447027581676e-06, "loss": 0.4569, "step": 53820 }, { "epoch": 5.391646216256824, "grad_norm": 2.658900022506714, "learning_rate": 6.252999738102381e-06, "loss": 0.436, "step": 53830 }, { "epoch": 5.392647868983824, "grad_norm": 2.1993579864501953, "learning_rate": 6.245556247629436e-06, "loss": 0.3874, "step": 53840 }, { "epoch": 5.393649521710823, "grad_norm": 2.270911931991577, "learning_rate": 6.238116557671217e-06, "loss": 0.3653, "step": 53850 }, { "epoch": 5.394651174437822, "grad_norm": 2.0035555362701416, "learning_rate": 6.230680669735361e-06, "loss": 0.3763, "step": 53860 }, { "epoch": 5.395652827164822, "grad_norm": 3.234086513519287, "learning_rate": 6.223248585328734e-06, "loss": 0.4419, "step": 53870 }, { "epoch": 5.396654479891821, "grad_norm": 2.189328193664551, "learning_rate": 6.215820305957393e-06, "loss": 0.389, "step": 53880 }, { "epoch": 5.397656132618821, "grad_norm": 2.4266152381896973, "learning_rate": 6.208395833126657e-06, "loss": 0.3803, "step": 53890 }, { "epoch": 5.39865778534582, "grad_norm": 2.4305834770202637, "learning_rate": 6.200975168341081e-06, "loss": 0.3779, "step": 53900 }, { "epoch": 5.3996594380728204, "grad_norm": 2.4199655055999756, "learning_rate": 6.193558313104425e-06, "loss": 0.417, "step": 53910 }, { "epoch": 5.40066109079982, "grad_norm": 2.372927188873291, "learning_rate": 6.186145268919677e-06, "loss": 0.4141, "step": 53920 }, { "epoch": 5.40166274352682, "grad_norm": 2.0953314304351807, "learning_rate": 6.178736037289074e-06, "loss": 0.394, "step": 53930 }, { "epoch": 5.402664396253819, "grad_norm": 2.165076971054077, "learning_rate": 6.1713306197140605e-06, "loss": 0.3865, "step": 53940 }, { "epoch": 5.403666048980819, "grad_norm": 2.225846290588379, "learning_rate": 6.163929017695328e-06, "loss": 0.3702, "step": 53950 }, { "epoch": 5.404667701707818, "grad_norm": 2.3943235874176025, "learning_rate": 6.156531232732756e-06, "loss": 0.444, "step": 53960 }, { "epoch": 5.405669354434817, "grad_norm": 2.743006706237793, "learning_rate": 6.1491372663255025e-06, "loss": 0.4253, "step": 53970 }, { "epoch": 5.406671007161817, "grad_norm": 2.2035491466522217, "learning_rate": 6.141747119971925e-06, "loss": 0.4603, "step": 53980 }, { "epoch": 5.407672659888816, "grad_norm": 2.0817651748657227, "learning_rate": 6.1343607951695805e-06, "loss": 0.4126, "step": 53990 }, { "epoch": 5.408674312615816, "grad_norm": 2.3498198986053467, "learning_rate": 6.126978293415306e-06, "loss": 0.3927, "step": 54000 }, { "epoch": 5.4096759653428155, "grad_norm": 2.3940587043762207, "learning_rate": 6.1195996162051295e-06, "loss": 0.4416, "step": 54010 }, { "epoch": 5.4106776180698155, "grad_norm": 1.8981224298477173, "learning_rate": 6.112224765034316e-06, "loss": 0.3833, "step": 54020 }, { "epoch": 5.411679270796815, "grad_norm": 2.426468849182129, "learning_rate": 6.104853741397332e-06, "loss": 0.4342, "step": 54030 }, { "epoch": 5.412680923523815, "grad_norm": 3.046752452850342, "learning_rate": 6.097486546787903e-06, "loss": 0.4214, "step": 54040 }, { "epoch": 5.413682576250814, "grad_norm": 1.7482635974884033, "learning_rate": 6.090123182698965e-06, "loss": 0.4063, "step": 54050 }, { "epoch": 5.414684228977813, "grad_norm": 2.8719656467437744, "learning_rate": 6.082763650622655e-06, "loss": 0.4556, "step": 54060 }, { "epoch": 5.415685881704813, "grad_norm": 2.3082518577575684, "learning_rate": 6.075407952050374e-06, "loss": 0.4332, "step": 54070 }, { "epoch": 5.416687534431812, "grad_norm": 1.7820711135864258, "learning_rate": 6.068056088472715e-06, "loss": 0.3846, "step": 54080 }, { "epoch": 5.417689187158812, "grad_norm": 2.5791473388671875, "learning_rate": 6.060708061379516e-06, "loss": 0.4377, "step": 54090 }, { "epoch": 5.418690839885811, "grad_norm": 2.1430938243865967, "learning_rate": 6.053363872259802e-06, "loss": 0.3801, "step": 54100 }, { "epoch": 5.419692492612811, "grad_norm": 2.625718355178833, "learning_rate": 6.046023522601868e-06, "loss": 0.3715, "step": 54110 }, { "epoch": 5.4206941453398105, "grad_norm": 2.7916579246520996, "learning_rate": 6.038687013893199e-06, "loss": 0.3915, "step": 54120 }, { "epoch": 5.421695798066811, "grad_norm": 1.9555777311325073, "learning_rate": 6.031354347620508e-06, "loss": 0.3917, "step": 54130 }, { "epoch": 5.42269745079381, "grad_norm": 2.3919904232025146, "learning_rate": 6.024025525269733e-06, "loss": 0.4028, "step": 54140 }, { "epoch": 5.42369910352081, "grad_norm": 2.119253635406494, "learning_rate": 6.016700548326029e-06, "loss": 0.3997, "step": 54150 }, { "epoch": 5.424700756247809, "grad_norm": 1.9236420392990112, "learning_rate": 6.0093794182737866e-06, "loss": 0.4038, "step": 54160 }, { "epoch": 5.425702408974808, "grad_norm": 1.5495247840881348, "learning_rate": 6.002062136596578e-06, "loss": 0.3885, "step": 54170 }, { "epoch": 5.426704061701808, "grad_norm": 2.620678424835205, "learning_rate": 5.9947487047772425e-06, "loss": 0.4028, "step": 54180 }, { "epoch": 5.427705714428807, "grad_norm": 2.5425150394439697, "learning_rate": 5.987439124297814e-06, "loss": 0.4059, "step": 54190 }, { "epoch": 5.428707367155807, "grad_norm": 2.143843173980713, "learning_rate": 5.980133396639551e-06, "loss": 0.4257, "step": 54200 }, { "epoch": 5.429709019882806, "grad_norm": 2.344255208969116, "learning_rate": 5.972831523282927e-06, "loss": 0.4211, "step": 54210 }, { "epoch": 5.4307106726098064, "grad_norm": 1.8500442504882812, "learning_rate": 5.965533505707641e-06, "loss": 0.3479, "step": 54220 }, { "epoch": 5.431712325336806, "grad_norm": 1.6079330444335938, "learning_rate": 5.958239345392605e-06, "loss": 0.3915, "step": 54230 }, { "epoch": 5.432713978063806, "grad_norm": 2.237523317337036, "learning_rate": 5.950949043815956e-06, "loss": 0.4472, "step": 54240 }, { "epoch": 5.433715630790805, "grad_norm": 1.8473663330078125, "learning_rate": 5.943662602455044e-06, "loss": 0.3923, "step": 54250 }, { "epoch": 5.434717283517804, "grad_norm": 2.521827220916748, "learning_rate": 5.936380022786436e-06, "loss": 0.4243, "step": 54260 }, { "epoch": 5.435718936244804, "grad_norm": 1.908718466758728, "learning_rate": 5.929101306285919e-06, "loss": 0.3434, "step": 54270 }, { "epoch": 5.436720588971803, "grad_norm": 2.581207752227783, "learning_rate": 5.9218264544285e-06, "loss": 0.4207, "step": 54280 }, { "epoch": 5.437722241698803, "grad_norm": 2.1982924938201904, "learning_rate": 5.914555468688393e-06, "loss": 0.3784, "step": 54290 }, { "epoch": 5.438723894425802, "grad_norm": 2.5535848140716553, "learning_rate": 5.9072883505390395e-06, "loss": 0.4427, "step": 54300 }, { "epoch": 5.439725547152802, "grad_norm": 2.093137741088867, "learning_rate": 5.900025101453089e-06, "loss": 0.4415, "step": 54310 }, { "epoch": 5.4407271998798015, "grad_norm": 2.2132139205932617, "learning_rate": 5.892765722902413e-06, "loss": 0.4316, "step": 54320 }, { "epoch": 5.4417288526068015, "grad_norm": 1.9173887968063354, "learning_rate": 5.885510216358098e-06, "loss": 0.4156, "step": 54330 }, { "epoch": 5.442730505333801, "grad_norm": 1.8375977277755737, "learning_rate": 5.878258583290441e-06, "loss": 0.3712, "step": 54340 }, { "epoch": 5.443732158060801, "grad_norm": 2.173393726348877, "learning_rate": 5.871010825168957e-06, "loss": 0.3781, "step": 54350 }, { "epoch": 5.4447338107878, "grad_norm": 2.204589605331421, "learning_rate": 5.863766943462379e-06, "loss": 0.3748, "step": 54360 }, { "epoch": 5.445735463514799, "grad_norm": 2.3114230632781982, "learning_rate": 5.856526939638646e-06, "loss": 0.3875, "step": 54370 }, { "epoch": 5.446737116241799, "grad_norm": 2.2886545658111572, "learning_rate": 5.849290815164921e-06, "loss": 0.4018, "step": 54380 }, { "epoch": 5.447738768968798, "grad_norm": 2.2720205783843994, "learning_rate": 5.8420585715075744e-06, "loss": 0.4316, "step": 54390 }, { "epoch": 5.448740421695798, "grad_norm": 2.6107401847839355, "learning_rate": 5.8348302101321944e-06, "loss": 0.435, "step": 54400 }, { "epoch": 5.449742074422797, "grad_norm": 2.0635311603546143, "learning_rate": 5.827605732503577e-06, "loss": 0.4727, "step": 54410 }, { "epoch": 5.450743727149797, "grad_norm": 2.109706401824951, "learning_rate": 5.820385140085735e-06, "loss": 0.3891, "step": 54420 }, { "epoch": 5.4517453798767965, "grad_norm": 2.1004483699798584, "learning_rate": 5.813168434341898e-06, "loss": 0.4171, "step": 54430 }, { "epoch": 5.452747032603797, "grad_norm": 2.433253765106201, "learning_rate": 5.805955616734496e-06, "loss": 0.4314, "step": 54440 }, { "epoch": 5.453748685330796, "grad_norm": 1.5983706712722778, "learning_rate": 5.798746688725182e-06, "loss": 0.4282, "step": 54450 }, { "epoch": 5.454750338057796, "grad_norm": 1.6324114799499512, "learning_rate": 5.791541651774817e-06, "loss": 0.4018, "step": 54460 }, { "epoch": 5.455751990784795, "grad_norm": 2.1272900104522705, "learning_rate": 5.784340507343472e-06, "loss": 0.3705, "step": 54470 }, { "epoch": 5.456753643511794, "grad_norm": 1.5174928903579712, "learning_rate": 5.777143256890435e-06, "loss": 0.3686, "step": 54480 }, { "epoch": 5.457755296238794, "grad_norm": 2.0583887100219727, "learning_rate": 5.769949901874194e-06, "loss": 0.4044, "step": 54490 }, { "epoch": 5.458756948965793, "grad_norm": 2.4094724655151367, "learning_rate": 5.76276044375246e-06, "loss": 0.3918, "step": 54500 }, { "epoch": 5.459758601692793, "grad_norm": 3.136207103729248, "learning_rate": 5.75557488398214e-06, "loss": 0.374, "step": 54510 }, { "epoch": 5.460760254419792, "grad_norm": 1.848882794380188, "learning_rate": 5.74839322401938e-06, "loss": 0.3436, "step": 54520 }, { "epoch": 5.461761907146792, "grad_norm": 2.511730432510376, "learning_rate": 5.741215465319494e-06, "loss": 0.4469, "step": 54530 }, { "epoch": 5.462763559873792, "grad_norm": 1.784735918045044, "learning_rate": 5.7340416093370345e-06, "loss": 0.3822, "step": 54540 }, { "epoch": 5.463765212600792, "grad_norm": 2.1342051029205322, "learning_rate": 5.726871657525751e-06, "loss": 0.4132, "step": 54550 }, { "epoch": 5.464766865327791, "grad_norm": 3.0335421562194824, "learning_rate": 5.7197056113386215e-06, "loss": 0.4171, "step": 54560 }, { "epoch": 5.465768518054791, "grad_norm": 2.3769993782043457, "learning_rate": 5.712543472227797e-06, "loss": 0.3798, "step": 54570 }, { "epoch": 5.46677017078179, "grad_norm": 2.6821632385253906, "learning_rate": 5.705385241644662e-06, "loss": 0.3973, "step": 54580 }, { "epoch": 5.467771823508789, "grad_norm": 2.379179000854492, "learning_rate": 5.698230921039821e-06, "loss": 0.3875, "step": 54590 }, { "epoch": 5.468773476235789, "grad_norm": 2.252995729446411, "learning_rate": 5.691080511863051e-06, "loss": 0.3976, "step": 54600 }, { "epoch": 5.469775128962788, "grad_norm": 2.438122510910034, "learning_rate": 5.683934015563358e-06, "loss": 0.4273, "step": 54610 }, { "epoch": 5.470776781689788, "grad_norm": 2.041632652282715, "learning_rate": 5.676791433588946e-06, "loss": 0.4138, "step": 54620 }, { "epoch": 5.4717784344167875, "grad_norm": 2.2904317378997803, "learning_rate": 5.669652767387254e-06, "loss": 0.4218, "step": 54630 }, { "epoch": 5.4727800871437875, "grad_norm": 2.181467294692993, "learning_rate": 5.662518018404883e-06, "loss": 0.3501, "step": 54640 }, { "epoch": 5.473781739870787, "grad_norm": 1.8415119647979736, "learning_rate": 5.6553871880876575e-06, "loss": 0.3538, "step": 54650 }, { "epoch": 5.474783392597787, "grad_norm": 2.321047782897949, "learning_rate": 5.64826027788064e-06, "loss": 0.4236, "step": 54660 }, { "epoch": 5.475785045324786, "grad_norm": 2.017359972000122, "learning_rate": 5.641137289228049e-06, "loss": 0.4154, "step": 54670 }, { "epoch": 5.476786698051786, "grad_norm": 2.092787504196167, "learning_rate": 5.6340182235733315e-06, "loss": 0.3638, "step": 54680 }, { "epoch": 5.477788350778785, "grad_norm": 2.5516960620880127, "learning_rate": 5.626903082359139e-06, "loss": 0.4228, "step": 54690 }, { "epoch": 5.478790003505784, "grad_norm": 2.765859603881836, "learning_rate": 5.619791867027343e-06, "loss": 0.3729, "step": 54700 }, { "epoch": 5.479791656232784, "grad_norm": 1.8943431377410889, "learning_rate": 5.612684579018984e-06, "loss": 0.3901, "step": 54710 }, { "epoch": 5.480793308959783, "grad_norm": 2.0830743312835693, "learning_rate": 5.605581219774325e-06, "loss": 0.449, "step": 54720 }, { "epoch": 5.481794961686783, "grad_norm": 2.621382474899292, "learning_rate": 5.598481790732851e-06, "loss": 0.3602, "step": 54730 }, { "epoch": 5.4827966144137825, "grad_norm": 2.1246063709259033, "learning_rate": 5.591386293333231e-06, "loss": 0.4236, "step": 54740 }, { "epoch": 5.483798267140783, "grad_norm": 1.4775830507278442, "learning_rate": 5.584294729013325e-06, "loss": 0.3476, "step": 54750 }, { "epoch": 5.484799919867782, "grad_norm": 2.423421859741211, "learning_rate": 5.577207099210216e-06, "loss": 0.3928, "step": 54760 }, { "epoch": 5.485801572594782, "grad_norm": 2.1227428913116455, "learning_rate": 5.570123405360198e-06, "loss": 0.4252, "step": 54770 }, { "epoch": 5.486803225321781, "grad_norm": 2.768158435821533, "learning_rate": 5.563043648898738e-06, "loss": 0.4303, "step": 54780 }, { "epoch": 5.487804878048781, "grad_norm": 3.048062324523926, "learning_rate": 5.55596783126052e-06, "loss": 0.4353, "step": 54790 }, { "epoch": 5.48880653077578, "grad_norm": 2.053222179412842, "learning_rate": 5.548895953879443e-06, "loss": 0.4145, "step": 54800 }, { "epoch": 5.489808183502779, "grad_norm": 3.174201011657715, "learning_rate": 5.541828018188599e-06, "loss": 0.4039, "step": 54810 }, { "epoch": 5.490809836229779, "grad_norm": 1.8603386878967285, "learning_rate": 5.5347640256202595e-06, "loss": 0.4172, "step": 54820 }, { "epoch": 5.491811488956778, "grad_norm": 2.1023452281951904, "learning_rate": 5.527703977605919e-06, "loss": 0.4082, "step": 54830 }, { "epoch": 5.492813141683778, "grad_norm": 2.3197402954101562, "learning_rate": 5.520647875576279e-06, "loss": 0.3994, "step": 54840 }, { "epoch": 5.493814794410778, "grad_norm": 1.8299857378005981, "learning_rate": 5.513595720961231e-06, "loss": 0.3643, "step": 54850 }, { "epoch": 5.494816447137778, "grad_norm": 1.9272540807724, "learning_rate": 5.50654751518985e-06, "loss": 0.4442, "step": 54860 }, { "epoch": 5.495818099864777, "grad_norm": 2.175856113433838, "learning_rate": 5.499503259690442e-06, "loss": 0.3711, "step": 54870 }, { "epoch": 5.496819752591777, "grad_norm": 2.690469741821289, "learning_rate": 5.492462955890504e-06, "loss": 0.4243, "step": 54880 }, { "epoch": 5.497821405318776, "grad_norm": 2.87835693359375, "learning_rate": 5.4854266052167065e-06, "loss": 0.4216, "step": 54890 }, { "epoch": 5.498823058045776, "grad_norm": 2.505415439605713, "learning_rate": 5.478394209094942e-06, "loss": 0.4195, "step": 54900 }, { "epoch": 5.499824710772775, "grad_norm": 2.257429361343384, "learning_rate": 5.471365768950313e-06, "loss": 0.3791, "step": 54910 }, { "epoch": 5.500826363499774, "grad_norm": 1.8191579580307007, "learning_rate": 5.464341286207103e-06, "loss": 0.4309, "step": 54920 }, { "epoch": 5.501828016226774, "grad_norm": 2.082589626312256, "learning_rate": 5.4573207622887755e-06, "loss": 0.4079, "step": 54930 }, { "epoch": 5.5028296689537735, "grad_norm": 2.1472620964050293, "learning_rate": 5.450304198618034e-06, "loss": 0.3812, "step": 54940 }, { "epoch": 5.5038313216807735, "grad_norm": 2.3610379695892334, "learning_rate": 5.443291596616748e-06, "loss": 0.4051, "step": 54950 }, { "epoch": 5.504832974407773, "grad_norm": 1.5136091709136963, "learning_rate": 5.436282957706004e-06, "loss": 0.3947, "step": 54960 }, { "epoch": 5.505834627134773, "grad_norm": 2.1798224449157715, "learning_rate": 5.429278283306055e-06, "loss": 0.3891, "step": 54970 }, { "epoch": 5.506836279861772, "grad_norm": 2.1945409774780273, "learning_rate": 5.42227757483639e-06, "loss": 0.3617, "step": 54980 }, { "epoch": 5.507837932588772, "grad_norm": 1.7211298942565918, "learning_rate": 5.415280833715675e-06, "loss": 0.4423, "step": 54990 }, { "epoch": 5.508839585315771, "grad_norm": 2.1400156021118164, "learning_rate": 5.408288061361749e-06, "loss": 0.4237, "step": 55000 } ], "logging_steps": 10, "max_steps": 69888, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.02157876235862e+21, "train_batch_size": 8, "trial_name": null, "trial_params": null }