{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 28.83922134102379, "eval_steps": 500, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007209805335255948, "grad_norm": 2.1327059268951416, "learning_rate": 3.6e-07, "loss": 1.2212, "step": 10 }, { "epoch": 0.014419610670511895, "grad_norm": 1.382253646850586, "learning_rate": 7.6e-07, "loss": 1.2991, "step": 20 }, { "epoch": 0.021629416005767843, "grad_norm": 1.1995000839233398, "learning_rate": 1.16e-06, "loss": 1.2719, "step": 30 }, { "epoch": 0.02883922134102379, "grad_norm": 1.653165340423584, "learning_rate": 1.56e-06, "loss": 1.2673, "step": 40 }, { "epoch": 0.03604902667627974, "grad_norm": 1.3154479265213013, "learning_rate": 1.96e-06, "loss": 1.1789, "step": 50 }, { "epoch": 0.043258832011535686, "grad_norm": 1.1589637994766235, "learning_rate": 2.36e-06, "loss": 1.2281, "step": 60 }, { "epoch": 0.050468637346791634, "grad_norm": 1.451952338218689, "learning_rate": 2.7600000000000003e-06, "loss": 1.1345, "step": 70 }, { "epoch": 0.05767844268204758, "grad_norm": 1.2279274463653564, "learning_rate": 3.1600000000000007e-06, "loss": 1.0961, "step": 80 }, { "epoch": 0.06488824801730353, "grad_norm": 1.0178685188293457, "learning_rate": 3.5600000000000002e-06, "loss": 1.0325, "step": 90 }, { "epoch": 0.07209805335255948, "grad_norm": 1.6672283411026, "learning_rate": 3.96e-06, "loss": 0.993, "step": 100 }, { "epoch": 0.07930785868781542, "grad_norm": 1.4387843608856201, "learning_rate": 4.360000000000001e-06, "loss": 0.9214, "step": 110 }, { "epoch": 0.08651766402307137, "grad_norm": 1.1955718994140625, "learning_rate": 4.76e-06, "loss": 0.8239, "step": 120 }, { "epoch": 0.09372746935832732, "grad_norm": 0.8492947220802307, "learning_rate": 5.1600000000000006e-06, "loss": 0.7381, "step": 130 }, { "epoch": 0.10093727469358327, "grad_norm": 0.8091511130332947, "learning_rate": 5.56e-06, "loss": 0.7156, "step": 140 }, { "epoch": 0.10814708002883922, "grad_norm": 0.7281172871589661, "learning_rate": 5.9600000000000005e-06, "loss": 0.637, "step": 150 }, { "epoch": 0.11535688536409516, "grad_norm": 0.6586740612983704, "learning_rate": 6.360000000000001e-06, "loss": 0.5823, "step": 160 }, { "epoch": 0.12256669069935111, "grad_norm": 0.5243703126907349, "learning_rate": 6.76e-06, "loss": 0.5232, "step": 170 }, { "epoch": 0.12977649603460706, "grad_norm": 0.6083728075027466, "learning_rate": 7.16e-06, "loss": 0.501, "step": 180 }, { "epoch": 0.136986301369863, "grad_norm": 0.4979970455169678, "learning_rate": 7.5600000000000005e-06, "loss": 0.4354, "step": 190 }, { "epoch": 0.14419610670511895, "grad_norm": 0.3436925709247589, "learning_rate": 7.96e-06, "loss": 0.4176, "step": 200 }, { "epoch": 0.1514059120403749, "grad_norm": 0.4993903934955597, "learning_rate": 8.36e-06, "loss": 0.3927, "step": 210 }, { "epoch": 0.15861571737563085, "grad_norm": 0.6840378046035767, "learning_rate": 8.76e-06, "loss": 0.3883, "step": 220 }, { "epoch": 0.1658255227108868, "grad_norm": 0.5888485908508301, "learning_rate": 9.16e-06, "loss": 0.3724, "step": 230 }, { "epoch": 0.17303532804614274, "grad_norm": 0.4165538549423218, "learning_rate": 9.560000000000002e-06, "loss": 0.3362, "step": 240 }, { "epoch": 0.1802451333813987, "grad_norm": 0.416669100522995, "learning_rate": 9.96e-06, "loss": 0.3381, "step": 250 }, { "epoch": 0.18745493871665464, "grad_norm": 0.4517158567905426, "learning_rate": 1.036e-05, "loss": 0.3093, "step": 260 }, { "epoch": 0.1946647440519106, "grad_norm": 0.40463581681251526, "learning_rate": 1.076e-05, "loss": 0.317, "step": 270 }, { "epoch": 0.20187454938716654, "grad_norm": 0.5361639857292175, "learning_rate": 1.1160000000000002e-05, "loss": 0.2846, "step": 280 }, { "epoch": 0.20908435472242248, "grad_norm": 0.497188538312912, "learning_rate": 1.156e-05, "loss": 0.3001, "step": 290 }, { "epoch": 0.21629416005767843, "grad_norm": 0.6741958260536194, "learning_rate": 1.196e-05, "loss": 0.2691, "step": 300 }, { "epoch": 0.22350396539293438, "grad_norm": 0.4807916581630707, "learning_rate": 1.236e-05, "loss": 0.2636, "step": 310 }, { "epoch": 0.23071377072819033, "grad_norm": 0.42234480381011963, "learning_rate": 1.276e-05, "loss": 0.2511, "step": 320 }, { "epoch": 0.23792357606344627, "grad_norm": 0.40240347385406494, "learning_rate": 1.316e-05, "loss": 0.2462, "step": 330 }, { "epoch": 0.24513338139870222, "grad_norm": 0.48760631680488586, "learning_rate": 1.356e-05, "loss": 0.2414, "step": 340 }, { "epoch": 0.25234318673395817, "grad_norm": 0.41525670886039734, "learning_rate": 1.396e-05, "loss": 0.2355, "step": 350 }, { "epoch": 0.2595529920692141, "grad_norm": 0.5057177543640137, "learning_rate": 1.4360000000000001e-05, "loss": 0.2155, "step": 360 }, { "epoch": 0.26676279740447006, "grad_norm": 0.868471086025238, "learning_rate": 1.4760000000000001e-05, "loss": 0.2334, "step": 370 }, { "epoch": 0.273972602739726, "grad_norm": 0.6393494606018066, "learning_rate": 1.5160000000000002e-05, "loss": 0.2095, "step": 380 }, { "epoch": 0.28118240807498196, "grad_norm": 0.42319369316101074, "learning_rate": 1.556e-05, "loss": 0.2074, "step": 390 }, { "epoch": 0.2883922134102379, "grad_norm": 0.4037390649318695, "learning_rate": 1.596e-05, "loss": 0.1984, "step": 400 }, { "epoch": 0.29560201874549386, "grad_norm": 0.5208348035812378, "learning_rate": 1.636e-05, "loss": 0.1964, "step": 410 }, { "epoch": 0.3028118240807498, "grad_norm": 0.44761601090431213, "learning_rate": 1.6760000000000002e-05, "loss": 0.1879, "step": 420 }, { "epoch": 0.31002162941600575, "grad_norm": 0.38591891527175903, "learning_rate": 1.7160000000000002e-05, "loss": 0.1965, "step": 430 }, { "epoch": 0.3172314347512617, "grad_norm": 0.5054221749305725, "learning_rate": 1.756e-05, "loss": 0.1858, "step": 440 }, { "epoch": 0.32444124008651765, "grad_norm": 0.45405784249305725, "learning_rate": 1.796e-05, "loss": 0.1778, "step": 450 }, { "epoch": 0.3316510454217736, "grad_norm": 0.4895738363265991, "learning_rate": 1.8360000000000004e-05, "loss": 0.1816, "step": 460 }, { "epoch": 0.33886085075702954, "grad_norm": 0.48267173767089844, "learning_rate": 1.876e-05, "loss": 0.1785, "step": 470 }, { "epoch": 0.3460706560922855, "grad_norm": 0.7125782370567322, "learning_rate": 1.916e-05, "loss": 0.1839, "step": 480 }, { "epoch": 0.35328046142754144, "grad_norm": 0.6536968946456909, "learning_rate": 1.956e-05, "loss": 0.1671, "step": 490 }, { "epoch": 0.3604902667627974, "grad_norm": 0.5159474015235901, "learning_rate": 1.9960000000000002e-05, "loss": 0.17, "step": 500 }, { "epoch": 0.36770007209805333, "grad_norm": 0.7386950850486755, "learning_rate": 2.036e-05, "loss": 0.1793, "step": 510 }, { "epoch": 0.3749098774333093, "grad_norm": 0.6951833367347717, "learning_rate": 2.076e-05, "loss": 0.1693, "step": 520 }, { "epoch": 0.3821196827685652, "grad_norm": 0.5628671646118164, "learning_rate": 2.116e-05, "loss": 0.1716, "step": 530 }, { "epoch": 0.3893294881038212, "grad_norm": 0.4544248580932617, "learning_rate": 2.1560000000000004e-05, "loss": 0.1542, "step": 540 }, { "epoch": 0.3965392934390771, "grad_norm": 0.3949270248413086, "learning_rate": 2.196e-05, "loss": 0.16, "step": 550 }, { "epoch": 0.40374909877433307, "grad_norm": 0.49134019017219543, "learning_rate": 2.236e-05, "loss": 0.135, "step": 560 }, { "epoch": 0.410958904109589, "grad_norm": 0.36829662322998047, "learning_rate": 2.2760000000000002e-05, "loss": 0.1536, "step": 570 }, { "epoch": 0.41816870944484497, "grad_norm": 0.5665551424026489, "learning_rate": 2.3160000000000002e-05, "loss": 0.1544, "step": 580 }, { "epoch": 0.4253785147801009, "grad_norm": 0.5653684735298157, "learning_rate": 2.356e-05, "loss": 0.1607, "step": 590 }, { "epoch": 0.43258832011535686, "grad_norm": 0.5650323033332825, "learning_rate": 2.396e-05, "loss": 0.1632, "step": 600 }, { "epoch": 0.4397981254506128, "grad_norm": 0.49639394879341125, "learning_rate": 2.4360000000000004e-05, "loss": 0.1596, "step": 610 }, { "epoch": 0.44700793078586876, "grad_norm": 0.4332888126373291, "learning_rate": 2.476e-05, "loss": 0.1426, "step": 620 }, { "epoch": 0.4542177361211247, "grad_norm": 0.6475674510002136, "learning_rate": 2.516e-05, "loss": 0.1385, "step": 630 }, { "epoch": 0.46142754145638065, "grad_norm": 0.5228143930435181, "learning_rate": 2.556e-05, "loss": 0.1455, "step": 640 }, { "epoch": 0.4686373467916366, "grad_norm": 0.6459916234016418, "learning_rate": 2.5960000000000002e-05, "loss": 0.1465, "step": 650 }, { "epoch": 0.47584715212689255, "grad_norm": 0.680280864238739, "learning_rate": 2.6360000000000002e-05, "loss": 0.1518, "step": 660 }, { "epoch": 0.4830569574621485, "grad_norm": 0.5213512182235718, "learning_rate": 2.676e-05, "loss": 0.1499, "step": 670 }, { "epoch": 0.49026676279740444, "grad_norm": 0.5137337446212769, "learning_rate": 2.716e-05, "loss": 0.1533, "step": 680 }, { "epoch": 0.4974765681326604, "grad_norm": 0.7666547298431396, "learning_rate": 2.7560000000000004e-05, "loss": 0.1416, "step": 690 }, { "epoch": 0.5046863734679163, "grad_norm": 0.7990226149559021, "learning_rate": 2.7960000000000003e-05, "loss": 0.134, "step": 700 }, { "epoch": 0.5118961788031723, "grad_norm": 0.5375038981437683, "learning_rate": 2.8360000000000003e-05, "loss": 0.1605, "step": 710 }, { "epoch": 0.5191059841384282, "grad_norm": 0.46409884095191956, "learning_rate": 2.8760000000000002e-05, "loss": 0.1437, "step": 720 }, { "epoch": 0.5263157894736842, "grad_norm": 0.45672959089279175, "learning_rate": 2.9160000000000005e-05, "loss": 0.1421, "step": 730 }, { "epoch": 0.5335255948089401, "grad_norm": 0.5050258040428162, "learning_rate": 2.9559999999999998e-05, "loss": 0.1279, "step": 740 }, { "epoch": 0.5407354001441961, "grad_norm": 0.6213821172714233, "learning_rate": 2.9959999999999998e-05, "loss": 0.1402, "step": 750 }, { "epoch": 0.547945205479452, "grad_norm": 0.5678633451461792, "learning_rate": 3.036e-05, "loss": 0.1235, "step": 760 }, { "epoch": 0.555155010814708, "grad_norm": 0.5220978856086731, "learning_rate": 3.076e-05, "loss": 0.1361, "step": 770 }, { "epoch": 0.5623648161499639, "grad_norm": 0.3831532299518585, "learning_rate": 3.116e-05, "loss": 0.1347, "step": 780 }, { "epoch": 0.5695746214852199, "grad_norm": 0.5202230215072632, "learning_rate": 3.156e-05, "loss": 0.1289, "step": 790 }, { "epoch": 0.5767844268204758, "grad_norm": 0.47897499799728394, "learning_rate": 3.196e-05, "loss": 0.1371, "step": 800 }, { "epoch": 0.5839942321557318, "grad_norm": 0.6908076405525208, "learning_rate": 3.236e-05, "loss": 0.1292, "step": 810 }, { "epoch": 0.5912040374909877, "grad_norm": 0.4325861930847168, "learning_rate": 3.2760000000000005e-05, "loss": 0.123, "step": 820 }, { "epoch": 0.5984138428262437, "grad_norm": 0.4450105130672455, "learning_rate": 3.316e-05, "loss": 0.1372, "step": 830 }, { "epoch": 0.6056236481614996, "grad_norm": 0.4178124964237213, "learning_rate": 3.3560000000000004e-05, "loss": 0.1255, "step": 840 }, { "epoch": 0.6128334534967556, "grad_norm": 0.49433547258377075, "learning_rate": 3.396e-05, "loss": 0.1385, "step": 850 }, { "epoch": 0.6200432588320115, "grad_norm": 0.3447383940219879, "learning_rate": 3.436e-05, "loss": 0.1274, "step": 860 }, { "epoch": 0.6272530641672674, "grad_norm": 0.5080226063728333, "learning_rate": 3.4760000000000006e-05, "loss": 0.1318, "step": 870 }, { "epoch": 0.6344628695025234, "grad_norm": 0.47062036395072937, "learning_rate": 3.516e-05, "loss": 0.1231, "step": 880 }, { "epoch": 0.6416726748377793, "grad_norm": 0.5314539074897766, "learning_rate": 3.5560000000000005e-05, "loss": 0.113, "step": 890 }, { "epoch": 0.6488824801730353, "grad_norm": 0.6027946472167969, "learning_rate": 3.596e-05, "loss": 0.1295, "step": 900 }, { "epoch": 0.6560922855082912, "grad_norm": 0.6752146482467651, "learning_rate": 3.636e-05, "loss": 0.1167, "step": 910 }, { "epoch": 0.6633020908435472, "grad_norm": 0.4290522038936615, "learning_rate": 3.676e-05, "loss": 0.123, "step": 920 }, { "epoch": 0.6705118961788031, "grad_norm": 0.5185308456420898, "learning_rate": 3.716e-05, "loss": 0.1291, "step": 930 }, { "epoch": 0.6777217015140591, "grad_norm": 0.48521891236305237, "learning_rate": 3.756e-05, "loss": 0.1198, "step": 940 }, { "epoch": 0.684931506849315, "grad_norm": 0.7330039739608765, "learning_rate": 3.796e-05, "loss": 0.1226, "step": 950 }, { "epoch": 0.692141312184571, "grad_norm": 0.578824520111084, "learning_rate": 3.836e-05, "loss": 0.118, "step": 960 }, { "epoch": 0.6993511175198269, "grad_norm": 0.3544694483280182, "learning_rate": 3.876e-05, "loss": 0.1175, "step": 970 }, { "epoch": 0.7065609228550829, "grad_norm": 0.4823664724826813, "learning_rate": 3.9160000000000005e-05, "loss": 0.1297, "step": 980 }, { "epoch": 0.7137707281903388, "grad_norm": 0.4556851387023926, "learning_rate": 3.956e-05, "loss": 0.1249, "step": 990 }, { "epoch": 0.7209805335255948, "grad_norm": 0.7330760359764099, "learning_rate": 3.9960000000000004e-05, "loss": 0.1151, "step": 1000 }, { "epoch": 0.7281903388608507, "grad_norm": 0.6136770248413086, "learning_rate": 4.0360000000000007e-05, "loss": 0.1104, "step": 1010 }, { "epoch": 0.7354001441961067, "grad_norm": 0.4075870215892792, "learning_rate": 4.076e-05, "loss": 0.12, "step": 1020 }, { "epoch": 0.7426099495313626, "grad_norm": 0.41454657912254333, "learning_rate": 4.1160000000000006e-05, "loss": 0.1194, "step": 1030 }, { "epoch": 0.7498197548666186, "grad_norm": 0.49997684359550476, "learning_rate": 4.156e-05, "loss": 0.1207, "step": 1040 }, { "epoch": 0.7570295602018745, "grad_norm": 0.6143680214881897, "learning_rate": 4.196e-05, "loss": 0.1099, "step": 1050 }, { "epoch": 0.7642393655371305, "grad_norm": 0.4521079957485199, "learning_rate": 4.236e-05, "loss": 0.1197, "step": 1060 }, { "epoch": 0.7714491708723864, "grad_norm": 0.3707487881183624, "learning_rate": 4.276e-05, "loss": 0.1144, "step": 1070 }, { "epoch": 0.7786589762076424, "grad_norm": 0.5231669545173645, "learning_rate": 4.316e-05, "loss": 0.1211, "step": 1080 }, { "epoch": 0.7858687815428983, "grad_norm": 0.34436148405075073, "learning_rate": 4.356e-05, "loss": 0.0969, "step": 1090 }, { "epoch": 0.7930785868781542, "grad_norm": 0.6854597926139832, "learning_rate": 4.396e-05, "loss": 0.1061, "step": 1100 }, { "epoch": 0.8002883922134102, "grad_norm": 0.6877591609954834, "learning_rate": 4.436e-05, "loss": 0.1168, "step": 1110 }, { "epoch": 0.8074981975486661, "grad_norm": 0.42097362875938416, "learning_rate": 4.4760000000000005e-05, "loss": 0.1104, "step": 1120 }, { "epoch": 0.8147080028839221, "grad_norm": 0.3350059986114502, "learning_rate": 4.516e-05, "loss": 0.1187, "step": 1130 }, { "epoch": 0.821917808219178, "grad_norm": 0.6175606846809387, "learning_rate": 4.5560000000000004e-05, "loss": 0.1181, "step": 1140 }, { "epoch": 0.829127613554434, "grad_norm": 0.7081620097160339, "learning_rate": 4.596e-05, "loss": 0.1188, "step": 1150 }, { "epoch": 0.8363374188896899, "grad_norm": 0.6052496433258057, "learning_rate": 4.636e-05, "loss": 0.1076, "step": 1160 }, { "epoch": 0.8435472242249459, "grad_norm": 0.45148536562919617, "learning_rate": 4.6760000000000006e-05, "loss": 0.1113, "step": 1170 }, { "epoch": 0.8507570295602018, "grad_norm": 0.5420788526535034, "learning_rate": 4.716e-05, "loss": 0.1095, "step": 1180 }, { "epoch": 0.8579668348954578, "grad_norm": 0.44963744282722473, "learning_rate": 4.7560000000000005e-05, "loss": 0.0978, "step": 1190 }, { "epoch": 0.8651766402307137, "grad_norm": 0.4472368061542511, "learning_rate": 4.796e-05, "loss": 0.1121, "step": 1200 }, { "epoch": 0.8723864455659697, "grad_norm": 0.6222316026687622, "learning_rate": 4.836e-05, "loss": 0.1, "step": 1210 }, { "epoch": 0.8795962509012256, "grad_norm": 0.5870085954666138, "learning_rate": 4.876e-05, "loss": 0.1005, "step": 1220 }, { "epoch": 0.8868060562364816, "grad_norm": 0.5235710144042969, "learning_rate": 4.9160000000000004e-05, "loss": 0.1108, "step": 1230 }, { "epoch": 0.8940158615717375, "grad_norm": 0.6562889814376831, "learning_rate": 4.956e-05, "loss": 0.1037, "step": 1240 }, { "epoch": 0.9012256669069935, "grad_norm": 0.49944716691970825, "learning_rate": 4.996e-05, "loss": 0.115, "step": 1250 }, { "epoch": 0.9084354722422494, "grad_norm": 0.5446628332138062, "learning_rate": 5.0360000000000006e-05, "loss": 0.1027, "step": 1260 }, { "epoch": 0.9156452775775054, "grad_norm": 0.4193961024284363, "learning_rate": 5.076000000000001e-05, "loss": 0.1116, "step": 1270 }, { "epoch": 0.9228550829127613, "grad_norm": 0.6651361584663391, "learning_rate": 5.1160000000000005e-05, "loss": 0.1108, "step": 1280 }, { "epoch": 0.9300648882480173, "grad_norm": 0.4674115777015686, "learning_rate": 5.1559999999999994e-05, "loss": 0.1133, "step": 1290 }, { "epoch": 0.9372746935832732, "grad_norm": 0.4621220827102661, "learning_rate": 5.196e-05, "loss": 0.1053, "step": 1300 }, { "epoch": 0.9444844989185291, "grad_norm": 0.5915048718452454, "learning_rate": 5.236e-05, "loss": 0.1141, "step": 1310 }, { "epoch": 0.9516943042537851, "grad_norm": 0.4186863899230957, "learning_rate": 5.2759999999999996e-05, "loss": 0.1123, "step": 1320 }, { "epoch": 0.958904109589041, "grad_norm": 0.5275613069534302, "learning_rate": 5.316e-05, "loss": 0.0997, "step": 1330 }, { "epoch": 0.966113914924297, "grad_norm": 0.44258785247802734, "learning_rate": 5.356e-05, "loss": 0.1062, "step": 1340 }, { "epoch": 0.9733237202595529, "grad_norm": 0.5469958186149597, "learning_rate": 5.396e-05, "loss": 0.0997, "step": 1350 }, { "epoch": 0.9805335255948089, "grad_norm": 0.3933143615722656, "learning_rate": 5.436e-05, "loss": 0.0988, "step": 1360 }, { "epoch": 0.9877433309300648, "grad_norm": 0.39401349425315857, "learning_rate": 5.476e-05, "loss": 0.0968, "step": 1370 }, { "epoch": 0.9949531362653208, "grad_norm": 0.39958465099334717, "learning_rate": 5.516e-05, "loss": 0.1045, "step": 1380 }, { "epoch": 1.0021629416005768, "grad_norm": 0.42343318462371826, "learning_rate": 5.556e-05, "loss": 0.0968, "step": 1390 }, { "epoch": 1.0093727469358327, "grad_norm": 0.5041323304176331, "learning_rate": 5.596e-05, "loss": 0.1114, "step": 1400 }, { "epoch": 1.0165825522710887, "grad_norm": 0.4449611306190491, "learning_rate": 5.636e-05, "loss": 0.1025, "step": 1410 }, { "epoch": 1.0237923576063446, "grad_norm": 0.3080455958843231, "learning_rate": 5.6760000000000005e-05, "loss": 0.0989, "step": 1420 }, { "epoch": 1.0310021629416006, "grad_norm": 0.46148279309272766, "learning_rate": 5.716e-05, "loss": 0.1074, "step": 1430 }, { "epoch": 1.0382119682768565, "grad_norm": 0.4813390374183655, "learning_rate": 5.7560000000000005e-05, "loss": 0.1006, "step": 1440 }, { "epoch": 1.0454217736121125, "grad_norm": 0.4106311798095703, "learning_rate": 5.796e-05, "loss": 0.111, "step": 1450 }, { "epoch": 1.0526315789473684, "grad_norm": 0.47860997915267944, "learning_rate": 5.8360000000000004e-05, "loss": 0.1047, "step": 1460 }, { "epoch": 1.0598413842826244, "grad_norm": 0.3707919418811798, "learning_rate": 5.876000000000001e-05, "loss": 0.0941, "step": 1470 }, { "epoch": 1.0670511896178803, "grad_norm": 0.4452766478061676, "learning_rate": 5.916e-05, "loss": 0.1073, "step": 1480 }, { "epoch": 1.0742609949531363, "grad_norm": 0.4088212549686432, "learning_rate": 5.9560000000000006e-05, "loss": 0.0953, "step": 1490 }, { "epoch": 1.0814708002883922, "grad_norm": 0.32246363162994385, "learning_rate": 5.996e-05, "loss": 0.1008, "step": 1500 }, { "epoch": 1.0886806056236482, "grad_norm": 0.5031778812408447, "learning_rate": 6.0360000000000005e-05, "loss": 0.1043, "step": 1510 }, { "epoch": 1.095890410958904, "grad_norm": 0.49310392141342163, "learning_rate": 6.076000000000001e-05, "loss": 0.0931, "step": 1520 }, { "epoch": 1.10310021629416, "grad_norm": 0.4035817086696625, "learning_rate": 6.116e-05, "loss": 0.0959, "step": 1530 }, { "epoch": 1.110310021629416, "grad_norm": 0.5643427968025208, "learning_rate": 6.156e-05, "loss": 0.0982, "step": 1540 }, { "epoch": 1.117519826964672, "grad_norm": 0.49996694922447205, "learning_rate": 6.196000000000001e-05, "loss": 0.101, "step": 1550 }, { "epoch": 1.1247296322999278, "grad_norm": 0.49141815304756165, "learning_rate": 6.236e-05, "loss": 0.0983, "step": 1560 }, { "epoch": 1.131939437635184, "grad_norm": 0.421428382396698, "learning_rate": 6.276e-05, "loss": 0.0991, "step": 1570 }, { "epoch": 1.1391492429704397, "grad_norm": 0.6117605566978455, "learning_rate": 6.316000000000001e-05, "loss": 0.0993, "step": 1580 }, { "epoch": 1.1463590483056958, "grad_norm": 0.6001393795013428, "learning_rate": 6.356000000000001e-05, "loss": 0.0996, "step": 1590 }, { "epoch": 1.1535688536409516, "grad_norm": 0.463138222694397, "learning_rate": 6.396e-05, "loss": 0.0933, "step": 1600 }, { "epoch": 1.1607786589762077, "grad_norm": 0.5060444474220276, "learning_rate": 6.436e-05, "loss": 0.0948, "step": 1610 }, { "epoch": 1.1679884643114635, "grad_norm": 0.7089787125587463, "learning_rate": 6.476e-05, "loss": 0.1003, "step": 1620 }, { "epoch": 1.1751982696467196, "grad_norm": 0.30937638878822327, "learning_rate": 6.515999999999999e-05, "loss": 0.0915, "step": 1630 }, { "epoch": 1.1824080749819754, "grad_norm": 0.35424089431762695, "learning_rate": 6.556e-05, "loss": 0.1013, "step": 1640 }, { "epoch": 1.1896178803172315, "grad_norm": 0.4797799289226532, "learning_rate": 6.596e-05, "loss": 0.0993, "step": 1650 }, { "epoch": 1.1968276856524873, "grad_norm": 0.4629519581794739, "learning_rate": 6.636e-05, "loss": 0.0851, "step": 1660 }, { "epoch": 1.2040374909877434, "grad_norm": 0.48005327582359314, "learning_rate": 6.676e-05, "loss": 0.0894, "step": 1670 }, { "epoch": 1.2112472963229992, "grad_norm": 0.4062623381614685, "learning_rate": 6.716e-05, "loss": 0.0917, "step": 1680 }, { "epoch": 1.2184571016582553, "grad_norm": 0.5013363361358643, "learning_rate": 6.756e-05, "loss": 0.0958, "step": 1690 }, { "epoch": 1.225666906993511, "grad_norm": 0.4881376028060913, "learning_rate": 6.796e-05, "loss": 0.0925, "step": 1700 }, { "epoch": 1.2328767123287672, "grad_norm": 0.44401422142982483, "learning_rate": 6.836e-05, "loss": 0.0955, "step": 1710 }, { "epoch": 1.240086517664023, "grad_norm": 0.28849318623542786, "learning_rate": 6.876e-05, "loss": 0.096, "step": 1720 }, { "epoch": 1.247296322999279, "grad_norm": 0.38528022170066833, "learning_rate": 6.916000000000001e-05, "loss": 0.0959, "step": 1730 }, { "epoch": 1.254506128334535, "grad_norm": 0.49716585874557495, "learning_rate": 6.956e-05, "loss": 0.0921, "step": 1740 }, { "epoch": 1.261715933669791, "grad_norm": 0.46446946263313293, "learning_rate": 6.996e-05, "loss": 0.0928, "step": 1750 }, { "epoch": 1.2689257390050468, "grad_norm": 0.4602750539779663, "learning_rate": 7.036e-05, "loss": 0.0949, "step": 1760 }, { "epoch": 1.2761355443403029, "grad_norm": 0.35578811168670654, "learning_rate": 7.076000000000001e-05, "loss": 0.0907, "step": 1770 }, { "epoch": 1.2833453496755587, "grad_norm": 0.35562247037887573, "learning_rate": 7.116e-05, "loss": 0.0914, "step": 1780 }, { "epoch": 1.2905551550108147, "grad_norm": 0.4065922200679779, "learning_rate": 7.156e-05, "loss": 0.0875, "step": 1790 }, { "epoch": 1.2977649603460706, "grad_norm": 0.46271100640296936, "learning_rate": 7.196000000000001e-05, "loss": 0.0951, "step": 1800 }, { "epoch": 1.3049747656813266, "grad_norm": 0.3801787495613098, "learning_rate": 7.236e-05, "loss": 0.0849, "step": 1810 }, { "epoch": 1.3121845710165825, "grad_norm": 0.446879506111145, "learning_rate": 7.276e-05, "loss": 0.081, "step": 1820 }, { "epoch": 1.3193943763518385, "grad_norm": 0.3949533700942993, "learning_rate": 7.316000000000001e-05, "loss": 0.0882, "step": 1830 }, { "epoch": 1.3266041816870944, "grad_norm": 0.5010216236114502, "learning_rate": 7.356000000000001e-05, "loss": 0.09, "step": 1840 }, { "epoch": 1.3338139870223504, "grad_norm": 0.4303171634674072, "learning_rate": 7.396e-05, "loss": 0.0901, "step": 1850 }, { "epoch": 1.3410237923576063, "grad_norm": 0.5670498013496399, "learning_rate": 7.436000000000001e-05, "loss": 0.0959, "step": 1860 }, { "epoch": 1.3482335976928623, "grad_norm": 0.5193422436714172, "learning_rate": 7.476000000000001e-05, "loss": 0.0969, "step": 1870 }, { "epoch": 1.3554434030281182, "grad_norm": 0.4263279139995575, "learning_rate": 7.516e-05, "loss": 0.0841, "step": 1880 }, { "epoch": 1.3626532083633742, "grad_norm": 0.46615734696388245, "learning_rate": 7.556000000000002e-05, "loss": 0.0836, "step": 1890 }, { "epoch": 1.36986301369863, "grad_norm": 0.32912829518318176, "learning_rate": 7.596000000000001e-05, "loss": 0.087, "step": 1900 }, { "epoch": 1.3770728190338861, "grad_norm": 0.55960613489151, "learning_rate": 7.636e-05, "loss": 0.0914, "step": 1910 }, { "epoch": 1.384282624369142, "grad_norm": 0.35763946175575256, "learning_rate": 7.676e-05, "loss": 0.0924, "step": 1920 }, { "epoch": 1.391492429704398, "grad_norm": 0.38107943534851074, "learning_rate": 7.716e-05, "loss": 0.0909, "step": 1930 }, { "epoch": 1.3987022350396539, "grad_norm": 0.46794185042381287, "learning_rate": 7.756e-05, "loss": 0.1016, "step": 1940 }, { "epoch": 1.40591204037491, "grad_norm": 0.3489101529121399, "learning_rate": 7.796e-05, "loss": 0.0917, "step": 1950 }, { "epoch": 1.4131218457101657, "grad_norm": 0.35835880041122437, "learning_rate": 7.836e-05, "loss": 0.0808, "step": 1960 }, { "epoch": 1.4203316510454218, "grad_norm": 0.31434789299964905, "learning_rate": 7.876e-05, "loss": 0.091, "step": 1970 }, { "epoch": 1.4275414563806776, "grad_norm": 0.30311062932014465, "learning_rate": 7.916e-05, "loss": 0.09, "step": 1980 }, { "epoch": 1.4347512617159337, "grad_norm": 0.3163358271121979, "learning_rate": 7.956e-05, "loss": 0.0856, "step": 1990 }, { "epoch": 1.4419610670511895, "grad_norm": 0.3147525191307068, "learning_rate": 7.996e-05, "loss": 0.0837, "step": 2000 }, { "epoch": 1.4491708723864456, "grad_norm": 0.32424867153167725, "learning_rate": 8.036e-05, "loss": 0.0816, "step": 2010 }, { "epoch": 1.4563806777217014, "grad_norm": 0.393873929977417, "learning_rate": 8.076e-05, "loss": 0.0827, "step": 2020 }, { "epoch": 1.4635904830569575, "grad_norm": 0.3319779932498932, "learning_rate": 8.116e-05, "loss": 0.0863, "step": 2030 }, { "epoch": 1.4708002883922133, "grad_norm": 0.3582944869995117, "learning_rate": 8.156e-05, "loss": 0.0895, "step": 2040 }, { "epoch": 1.4780100937274694, "grad_norm": 0.4111061096191406, "learning_rate": 8.196000000000001e-05, "loss": 0.0878, "step": 2050 }, { "epoch": 1.4852198990627252, "grad_norm": 0.3260714113712311, "learning_rate": 8.236e-05, "loss": 0.072, "step": 2060 }, { "epoch": 1.4924297043979813, "grad_norm": 0.3016377389431, "learning_rate": 8.276e-05, "loss": 0.0764, "step": 2070 }, { "epoch": 1.4996395097332371, "grad_norm": 0.37948793172836304, "learning_rate": 8.316000000000001e-05, "loss": 0.0847, "step": 2080 }, { "epoch": 1.5068493150684932, "grad_norm": 0.3595927357673645, "learning_rate": 8.356e-05, "loss": 0.0815, "step": 2090 }, { "epoch": 1.5140591204037492, "grad_norm": 0.3666624426841736, "learning_rate": 8.396e-05, "loss": 0.0792, "step": 2100 }, { "epoch": 1.521268925739005, "grad_norm": 0.28310030698776245, "learning_rate": 8.436000000000001e-05, "loss": 0.0855, "step": 2110 }, { "epoch": 1.528478731074261, "grad_norm": 0.3029365539550781, "learning_rate": 8.476000000000001e-05, "loss": 0.0766, "step": 2120 }, { "epoch": 1.535688536409517, "grad_norm": 0.3156358301639557, "learning_rate": 8.516e-05, "loss": 0.0843, "step": 2130 }, { "epoch": 1.542898341744773, "grad_norm": 0.35296371579170227, "learning_rate": 8.556e-05, "loss": 0.0852, "step": 2140 }, { "epoch": 1.5501081470800289, "grad_norm": 0.29170548915863037, "learning_rate": 8.596000000000001e-05, "loss": 0.0841, "step": 2150 }, { "epoch": 1.5573179524152847, "grad_norm": 0.2819412052631378, "learning_rate": 8.636e-05, "loss": 0.0802, "step": 2160 }, { "epoch": 1.5645277577505408, "grad_norm": 0.2647875249385834, "learning_rate": 8.676e-05, "loss": 0.0887, "step": 2170 }, { "epoch": 1.5717375630857968, "grad_norm": 0.328716516494751, "learning_rate": 8.716000000000001e-05, "loss": 0.0826, "step": 2180 }, { "epoch": 1.5789473684210527, "grad_norm": 0.3553984463214874, "learning_rate": 8.756000000000001e-05, "loss": 0.0781, "step": 2190 }, { "epoch": 1.5861571737563085, "grad_norm": 0.37277334928512573, "learning_rate": 8.796e-05, "loss": 0.0787, "step": 2200 }, { "epoch": 1.5933669790915646, "grad_norm": 0.3770895302295685, "learning_rate": 8.836000000000001e-05, "loss": 0.0791, "step": 2210 }, { "epoch": 1.6005767844268206, "grad_norm": 0.5017049908638, "learning_rate": 8.876e-05, "loss": 0.0747, "step": 2220 }, { "epoch": 1.6077865897620764, "grad_norm": 0.3267567455768585, "learning_rate": 8.916e-05, "loss": 0.0792, "step": 2230 }, { "epoch": 1.6149963950973323, "grad_norm": 0.29710131883621216, "learning_rate": 8.956e-05, "loss": 0.0759, "step": 2240 }, { "epoch": 1.6222062004325883, "grad_norm": 0.27342262864112854, "learning_rate": 8.996e-05, "loss": 0.0865, "step": 2250 }, { "epoch": 1.6294160057678444, "grad_norm": 0.32875198125839233, "learning_rate": 9.036e-05, "loss": 0.0794, "step": 2260 }, { "epoch": 1.6366258111031002, "grad_norm": 0.2911851704120636, "learning_rate": 9.076e-05, "loss": 0.0739, "step": 2270 }, { "epoch": 1.643835616438356, "grad_norm": 0.28106454014778137, "learning_rate": 9.116e-05, "loss": 0.0812, "step": 2280 }, { "epoch": 1.6510454217736121, "grad_norm": 0.29029953479766846, "learning_rate": 9.156e-05, "loss": 0.0758, "step": 2290 }, { "epoch": 1.6582552271088682, "grad_norm": 0.2682059109210968, "learning_rate": 9.196000000000001e-05, "loss": 0.0758, "step": 2300 }, { "epoch": 1.665465032444124, "grad_norm": 0.3062179684638977, "learning_rate": 9.236e-05, "loss": 0.0849, "step": 2310 }, { "epoch": 1.6726748377793799, "grad_norm": 0.3886307179927826, "learning_rate": 9.276e-05, "loss": 0.07, "step": 2320 }, { "epoch": 1.679884643114636, "grad_norm": 0.23646529018878937, "learning_rate": 9.316000000000001e-05, "loss": 0.0824, "step": 2330 }, { "epoch": 1.687094448449892, "grad_norm": 0.31431564688682556, "learning_rate": 9.356e-05, "loss": 0.0814, "step": 2340 }, { "epoch": 1.6943042537851478, "grad_norm": 0.3371534049510956, "learning_rate": 9.396e-05, "loss": 0.0764, "step": 2350 }, { "epoch": 1.7015140591204037, "grad_norm": 0.32606765627861023, "learning_rate": 9.436e-05, "loss": 0.0776, "step": 2360 }, { "epoch": 1.7087238644556597, "grad_norm": 0.3367191255092621, "learning_rate": 9.476000000000001e-05, "loss": 0.0786, "step": 2370 }, { "epoch": 1.7159336697909158, "grad_norm": 0.3235941231250763, "learning_rate": 9.516e-05, "loss": 0.0692, "step": 2380 }, { "epoch": 1.7231434751261716, "grad_norm": 0.2255658507347107, "learning_rate": 9.556e-05, "loss": 0.0816, "step": 2390 }, { "epoch": 1.7303532804614274, "grad_norm": 0.5148633718490601, "learning_rate": 9.596000000000001e-05, "loss": 0.0841, "step": 2400 }, { "epoch": 1.7375630857966835, "grad_norm": 0.2986004948616028, "learning_rate": 9.636e-05, "loss": 0.0708, "step": 2410 }, { "epoch": 1.7447728911319396, "grad_norm": 0.3429511785507202, "learning_rate": 9.676e-05, "loss": 0.0788, "step": 2420 }, { "epoch": 1.7519826964671954, "grad_norm": 0.2788890302181244, "learning_rate": 9.716000000000001e-05, "loss": 0.0772, "step": 2430 }, { "epoch": 1.7591925018024512, "grad_norm": 0.34731703996658325, "learning_rate": 9.756000000000001e-05, "loss": 0.079, "step": 2440 }, { "epoch": 1.7664023071377073, "grad_norm": 0.34628552198410034, "learning_rate": 9.796e-05, "loss": 0.0768, "step": 2450 }, { "epoch": 1.7736121124729634, "grad_norm": 0.24145205318927765, "learning_rate": 9.836000000000001e-05, "loss": 0.0764, "step": 2460 }, { "epoch": 1.7808219178082192, "grad_norm": 0.30319681763648987, "learning_rate": 9.876000000000001e-05, "loss": 0.0736, "step": 2470 }, { "epoch": 1.788031723143475, "grad_norm": 0.3288049101829529, "learning_rate": 9.916e-05, "loss": 0.0757, "step": 2480 }, { "epoch": 1.795241528478731, "grad_norm": 0.24683639407157898, "learning_rate": 9.956e-05, "loss": 0.0736, "step": 2490 }, { "epoch": 1.8024513338139871, "grad_norm": 0.2636137008666992, "learning_rate": 9.996000000000001e-05, "loss": 0.075, "step": 2500 }, { "epoch": 1.809661139149243, "grad_norm": 0.3855935037136078, "learning_rate": 9.999999114196196e-05, "loss": 0.0772, "step": 2510 }, { "epoch": 1.8168709444844988, "grad_norm": 0.3241812288761139, "learning_rate": 9.99999605215876e-05, "loss": 0.0722, "step": 2520 }, { "epoch": 1.8240807498197549, "grad_norm": 0.3677128553390503, "learning_rate": 9.999990802953179e-05, "loss": 0.0678, "step": 2530 }, { "epoch": 1.831290555155011, "grad_norm": 0.32813936471939087, "learning_rate": 9.99998336658175e-05, "loss": 0.0727, "step": 2540 }, { "epoch": 1.8385003604902668, "grad_norm": 0.25153809785842896, "learning_rate": 9.999973743047727e-05, "loss": 0.0787, "step": 2550 }, { "epoch": 1.8457101658255226, "grad_norm": 0.26241084933280945, "learning_rate": 9.999961932355319e-05, "loss": 0.077, "step": 2560 }, { "epoch": 1.8529199711607787, "grad_norm": 0.2777637243270874, "learning_rate": 9.999947934509693e-05, "loss": 0.0695, "step": 2570 }, { "epoch": 1.8601297764960347, "grad_norm": 0.3044157922267914, "learning_rate": 9.999931749516971e-05, "loss": 0.0718, "step": 2580 }, { "epoch": 1.8673395818312906, "grad_norm": 0.2749534249305725, "learning_rate": 9.999913377384233e-05, "loss": 0.0787, "step": 2590 }, { "epoch": 1.8745493871665464, "grad_norm": 0.2633066475391388, "learning_rate": 9.999892818119517e-05, "loss": 0.0718, "step": 2600 }, { "epoch": 1.8817591925018025, "grad_norm": 0.3488597869873047, "learning_rate": 9.999870071731814e-05, "loss": 0.0651, "step": 2610 }, { "epoch": 1.8889689978370585, "grad_norm": 0.27390459179878235, "learning_rate": 9.999845138231076e-05, "loss": 0.0746, "step": 2620 }, { "epoch": 1.8961788031723144, "grad_norm": 0.2843882739543915, "learning_rate": 9.999818017628208e-05, "loss": 0.0685, "step": 2630 }, { "epoch": 1.9033886085075702, "grad_norm": 0.28818389773368835, "learning_rate": 9.999788709935078e-05, "loss": 0.0673, "step": 2640 }, { "epoch": 1.9105984138428262, "grad_norm": 0.2801273465156555, "learning_rate": 9.9997572151645e-05, "loss": 0.0764, "step": 2650 }, { "epoch": 1.9178082191780823, "grad_norm": 0.3741348087787628, "learning_rate": 9.999723533330254e-05, "loss": 0.0778, "step": 2660 }, { "epoch": 1.9250180245133381, "grad_norm": 0.2770523726940155, "learning_rate": 9.999687664447074e-05, "loss": 0.0777, "step": 2670 }, { "epoch": 1.932227829848594, "grad_norm": 0.5473715662956238, "learning_rate": 9.99964960853065e-05, "loss": 0.0725, "step": 2680 }, { "epoch": 1.93943763518385, "grad_norm": 0.32351815700531006, "learning_rate": 9.999609365597627e-05, "loss": 0.0752, "step": 2690 }, { "epoch": 1.946647440519106, "grad_norm": 0.223007470369339, "learning_rate": 9.99956693566561e-05, "loss": 0.08, "step": 2700 }, { "epoch": 1.953857245854362, "grad_norm": 0.2653423845767975, "learning_rate": 9.99952231875316e-05, "loss": 0.075, "step": 2710 }, { "epoch": 1.9610670511896178, "grad_norm": 0.2597029209136963, "learning_rate": 9.999475514879795e-05, "loss": 0.0696, "step": 2720 }, { "epoch": 1.9682768565248738, "grad_norm": 0.25020116567611694, "learning_rate": 9.999426524065984e-05, "loss": 0.0683, "step": 2730 }, { "epoch": 1.9754866618601299, "grad_norm": 0.22979138791561127, "learning_rate": 9.999375346333162e-05, "loss": 0.0801, "step": 2740 }, { "epoch": 1.9826964671953857, "grad_norm": 0.3176663815975189, "learning_rate": 9.999321981703715e-05, "loss": 0.0671, "step": 2750 }, { "epoch": 1.9899062725306416, "grad_norm": 0.25713658332824707, "learning_rate": 9.999266430200985e-05, "loss": 0.0676, "step": 2760 }, { "epoch": 1.9971160778658976, "grad_norm": 0.3584665060043335, "learning_rate": 9.999208691849271e-05, "loss": 0.0698, "step": 2770 }, { "epoch": 2.0043258832011537, "grad_norm": 0.3240790069103241, "learning_rate": 9.999148766673832e-05, "loss": 0.079, "step": 2780 }, { "epoch": 2.0115356885364095, "grad_norm": 0.3667881190776825, "learning_rate": 9.999086654700881e-05, "loss": 0.066, "step": 2790 }, { "epoch": 2.0187454938716654, "grad_norm": 0.26150766015052795, "learning_rate": 9.999022355957588e-05, "loss": 0.072, "step": 2800 }, { "epoch": 2.025955299206921, "grad_norm": 0.28082677721977234, "learning_rate": 9.998955870472079e-05, "loss": 0.0733, "step": 2810 }, { "epoch": 2.0331651045421775, "grad_norm": 0.2802099883556366, "learning_rate": 9.998887198273437e-05, "loss": 0.063, "step": 2820 }, { "epoch": 2.0403749098774333, "grad_norm": 0.2714148163795471, "learning_rate": 9.998816339391701e-05, "loss": 0.0758, "step": 2830 }, { "epoch": 2.047584715212689, "grad_norm": 0.3184826374053955, "learning_rate": 9.998743293857868e-05, "loss": 0.0693, "step": 2840 }, { "epoch": 2.0547945205479454, "grad_norm": 0.30280905961990356, "learning_rate": 9.998668061703891e-05, "loss": 0.0675, "step": 2850 }, { "epoch": 2.0620043258832013, "grad_norm": 0.24647052586078644, "learning_rate": 9.998590642962679e-05, "loss": 0.0716, "step": 2860 }, { "epoch": 2.069214131218457, "grad_norm": 0.28467875719070435, "learning_rate": 9.998511037668095e-05, "loss": 0.0664, "step": 2870 }, { "epoch": 2.076423936553713, "grad_norm": 0.3332025408744812, "learning_rate": 9.998429245854964e-05, "loss": 0.0655, "step": 2880 }, { "epoch": 2.0836337418889688, "grad_norm": 0.24699394404888153, "learning_rate": 9.998345267559064e-05, "loss": 0.0665, "step": 2890 }, { "epoch": 2.090843547224225, "grad_norm": 0.21133895218372345, "learning_rate": 9.998259102817129e-05, "loss": 0.0657, "step": 2900 }, { "epoch": 2.098053352559481, "grad_norm": 0.2216673642396927, "learning_rate": 9.99817075166685e-05, "loss": 0.0763, "step": 2910 }, { "epoch": 2.1052631578947367, "grad_norm": 0.2703382670879364, "learning_rate": 9.998080214146878e-05, "loss": 0.0703, "step": 2920 }, { "epoch": 2.112472963229993, "grad_norm": 0.2474026381969452, "learning_rate": 9.997987490296813e-05, "loss": 0.0642, "step": 2930 }, { "epoch": 2.119682768565249, "grad_norm": 0.2147219032049179, "learning_rate": 9.99789258015722e-05, "loss": 0.0655, "step": 2940 }, { "epoch": 2.1268925739005047, "grad_norm": 0.298904150724411, "learning_rate": 9.997795483769611e-05, "loss": 0.065, "step": 2950 }, { "epoch": 2.1341023792357605, "grad_norm": 0.262148916721344, "learning_rate": 9.997696201176462e-05, "loss": 0.0753, "step": 2960 }, { "epoch": 2.1413121845710164, "grad_norm": 0.3388700485229492, "learning_rate": 9.997594732421203e-05, "loss": 0.0674, "step": 2970 }, { "epoch": 2.1485219899062726, "grad_norm": 0.2720179855823517, "learning_rate": 9.997491077548217e-05, "loss": 0.0697, "step": 2980 }, { "epoch": 2.1557317952415285, "grad_norm": 0.32596221566200256, "learning_rate": 9.997385236602851e-05, "loss": 0.0659, "step": 2990 }, { "epoch": 2.1629416005767843, "grad_norm": 0.3896872103214264, "learning_rate": 9.997277209631399e-05, "loss": 0.0711, "step": 3000 }, { "epoch": 2.1701514059120406, "grad_norm": 0.2607218623161316, "learning_rate": 9.997166996681118e-05, "loss": 0.0688, "step": 3010 }, { "epoch": 2.1773612112472964, "grad_norm": 0.2212297022342682, "learning_rate": 9.997054597800218e-05, "loss": 0.0741, "step": 3020 }, { "epoch": 2.1845710165825523, "grad_norm": 0.3323672115802765, "learning_rate": 9.996940013037866e-05, "loss": 0.0603, "step": 3030 }, { "epoch": 2.191780821917808, "grad_norm": 0.23356731235980988, "learning_rate": 9.996823242444186e-05, "loss": 0.0658, "step": 3040 }, { "epoch": 2.198990627253064, "grad_norm": 0.21355707943439484, "learning_rate": 9.996704286070258e-05, "loss": 0.0624, "step": 3050 }, { "epoch": 2.20620043258832, "grad_norm": 0.26965734362602234, "learning_rate": 9.996583143968115e-05, "loss": 0.0695, "step": 3060 }, { "epoch": 2.213410237923576, "grad_norm": 0.20256662368774414, "learning_rate": 9.99645981619075e-05, "loss": 0.0731, "step": 3070 }, { "epoch": 2.220620043258832, "grad_norm": 0.30516695976257324, "learning_rate": 9.996334302792114e-05, "loss": 0.0712, "step": 3080 }, { "epoch": 2.227829848594088, "grad_norm": 0.2361900806427002, "learning_rate": 9.996206603827105e-05, "loss": 0.0651, "step": 3090 }, { "epoch": 2.235039653929344, "grad_norm": 0.23828858137130737, "learning_rate": 9.996076719351587e-05, "loss": 0.0635, "step": 3100 }, { "epoch": 2.2422494592646, "grad_norm": 0.2296685427427292, "learning_rate": 9.995944649422374e-05, "loss": 0.0755, "step": 3110 }, { "epoch": 2.2494592645998557, "grad_norm": 0.24688735604286194, "learning_rate": 9.995810394097239e-05, "loss": 0.0639, "step": 3120 }, { "epoch": 2.2566690699351115, "grad_norm": 0.24765750765800476, "learning_rate": 9.995673953434909e-05, "loss": 0.0704, "step": 3130 }, { "epoch": 2.263878875270368, "grad_norm": 0.24908362329006195, "learning_rate": 9.995535327495068e-05, "loss": 0.0691, "step": 3140 }, { "epoch": 2.2710886806056236, "grad_norm": 0.194735586643219, "learning_rate": 9.995394516338355e-05, "loss": 0.0685, "step": 3150 }, { "epoch": 2.2782984859408795, "grad_norm": 0.3269915282726288, "learning_rate": 9.995251520026367e-05, "loss": 0.0616, "step": 3160 }, { "epoch": 2.2855082912761358, "grad_norm": 0.20556575059890747, "learning_rate": 9.995106338621656e-05, "loss": 0.0669, "step": 3170 }, { "epoch": 2.2927180966113916, "grad_norm": 0.226836696267128, "learning_rate": 9.994958972187726e-05, "loss": 0.0604, "step": 3180 }, { "epoch": 2.2999279019466474, "grad_norm": 0.22157134115695953, "learning_rate": 9.994809420789044e-05, "loss": 0.0601, "step": 3190 }, { "epoch": 2.3071377072819033, "grad_norm": 0.23397281765937805, "learning_rate": 9.994657684491027e-05, "loss": 0.0698, "step": 3200 }, { "epoch": 2.314347512617159, "grad_norm": 0.24023570120334625, "learning_rate": 9.994503763360048e-05, "loss": 0.0684, "step": 3210 }, { "epoch": 2.3215573179524154, "grad_norm": 0.3105556070804596, "learning_rate": 9.99434765746344e-05, "loss": 0.0656, "step": 3220 }, { "epoch": 2.328767123287671, "grad_norm": 0.17717993259429932, "learning_rate": 9.994189366869488e-05, "loss": 0.0626, "step": 3230 }, { "epoch": 2.335976928622927, "grad_norm": 0.2200339287519455, "learning_rate": 9.994028891647433e-05, "loss": 0.0607, "step": 3240 }, { "epoch": 2.3431867339581833, "grad_norm": 0.2898266017436981, "learning_rate": 9.993866231867475e-05, "loss": 0.058, "step": 3250 }, { "epoch": 2.350396539293439, "grad_norm": 0.22649803757667542, "learning_rate": 9.993701387600762e-05, "loss": 0.0671, "step": 3260 }, { "epoch": 2.357606344628695, "grad_norm": 0.2026831954717636, "learning_rate": 9.993534358919408e-05, "loss": 0.0609, "step": 3270 }, { "epoch": 2.364816149963951, "grad_norm": 0.23497985303401947, "learning_rate": 9.993365145896473e-05, "loss": 0.0569, "step": 3280 }, { "epoch": 2.3720259552992067, "grad_norm": 0.17127367854118347, "learning_rate": 9.993193748605977e-05, "loss": 0.0618, "step": 3290 }, { "epoch": 2.379235760634463, "grad_norm": 0.21610863506793976, "learning_rate": 9.993020167122898e-05, "loss": 0.0557, "step": 3300 }, { "epoch": 2.386445565969719, "grad_norm": 0.20904548466205597, "learning_rate": 9.992844401523164e-05, "loss": 0.0598, "step": 3310 }, { "epoch": 2.3936553713049746, "grad_norm": 0.22532647848129272, "learning_rate": 9.992666451883661e-05, "loss": 0.0626, "step": 3320 }, { "epoch": 2.400865176640231, "grad_norm": 0.18803730607032776, "learning_rate": 9.99248631828223e-05, "loss": 0.0592, "step": 3330 }, { "epoch": 2.4080749819754868, "grad_norm": 0.18418768048286438, "learning_rate": 9.99230400079767e-05, "loss": 0.0638, "step": 3340 }, { "epoch": 2.4152847873107426, "grad_norm": 0.19752810895442963, "learning_rate": 9.992119499509728e-05, "loss": 0.0691, "step": 3350 }, { "epoch": 2.4224945926459984, "grad_norm": 0.22758257389068604, "learning_rate": 9.991932814499114e-05, "loss": 0.0621, "step": 3360 }, { "epoch": 2.4297043979812543, "grad_norm": 0.21637767553329468, "learning_rate": 9.991743945847493e-05, "loss": 0.0696, "step": 3370 }, { "epoch": 2.4369142033165105, "grad_norm": 0.1820373386144638, "learning_rate": 9.991552893637478e-05, "loss": 0.0664, "step": 3380 }, { "epoch": 2.4441240086517664, "grad_norm": 0.250823050737381, "learning_rate": 9.991359657952644e-05, "loss": 0.0615, "step": 3390 }, { "epoch": 2.451333813987022, "grad_norm": 0.26243042945861816, "learning_rate": 9.991164238877519e-05, "loss": 0.0726, "step": 3400 }, { "epoch": 2.4585436193222785, "grad_norm": 0.26430773735046387, "learning_rate": 9.990966636497585e-05, "loss": 0.069, "step": 3410 }, { "epoch": 2.4657534246575343, "grad_norm": 0.2767289876937866, "learning_rate": 9.99076685089928e-05, "loss": 0.0681, "step": 3420 }, { "epoch": 2.47296322999279, "grad_norm": 0.2523096799850464, "learning_rate": 9.990564882169998e-05, "loss": 0.0606, "step": 3430 }, { "epoch": 2.480173035328046, "grad_norm": 0.2191690057516098, "learning_rate": 9.990360730398088e-05, "loss": 0.0578, "step": 3440 }, { "epoch": 2.487382840663302, "grad_norm": 0.2737160623073578, "learning_rate": 9.990154395672849e-05, "loss": 0.0643, "step": 3450 }, { "epoch": 2.494592645998558, "grad_norm": 0.2818142771720886, "learning_rate": 9.989945878084541e-05, "loss": 0.0704, "step": 3460 }, { "epoch": 2.501802451333814, "grad_norm": 0.17685405910015106, "learning_rate": 9.989735177724378e-05, "loss": 0.0585, "step": 3470 }, { "epoch": 2.50901225666907, "grad_norm": 0.20097985863685608, "learning_rate": 9.989522294684526e-05, "loss": 0.0653, "step": 3480 }, { "epoch": 2.516222062004326, "grad_norm": 0.2549450695514679, "learning_rate": 9.989307229058107e-05, "loss": 0.0605, "step": 3490 }, { "epoch": 2.523431867339582, "grad_norm": 0.25028911232948303, "learning_rate": 9.989089980939202e-05, "loss": 0.0738, "step": 3500 }, { "epoch": 2.5306416726748377, "grad_norm": 0.17094382643699646, "learning_rate": 9.988870550422835e-05, "loss": 0.0618, "step": 3510 }, { "epoch": 2.5378514780100936, "grad_norm": 0.2370288372039795, "learning_rate": 9.988648937604999e-05, "loss": 0.0635, "step": 3520 }, { "epoch": 2.5450612833453494, "grad_norm": 0.23692220449447632, "learning_rate": 9.988425142582632e-05, "loss": 0.0624, "step": 3530 }, { "epoch": 2.5522710886806057, "grad_norm": 0.19178417325019836, "learning_rate": 9.98819916545363e-05, "loss": 0.06, "step": 3540 }, { "epoch": 2.5594808940158615, "grad_norm": 0.2906595468521118, "learning_rate": 9.987971006316844e-05, "loss": 0.0662, "step": 3550 }, { "epoch": 2.5666906993511174, "grad_norm": 0.20351345837116241, "learning_rate": 9.987740665272077e-05, "loss": 0.0613, "step": 3560 }, { "epoch": 2.5739005046863737, "grad_norm": 0.21369053423404694, "learning_rate": 9.98750814242009e-05, "loss": 0.0712, "step": 3570 }, { "epoch": 2.5811103100216295, "grad_norm": 0.22824737429618835, "learning_rate": 9.987273437862594e-05, "loss": 0.0661, "step": 3580 }, { "epoch": 2.5883201153568853, "grad_norm": 0.3017346262931824, "learning_rate": 9.987036551702259e-05, "loss": 0.0574, "step": 3590 }, { "epoch": 2.595529920692141, "grad_norm": 0.313907653093338, "learning_rate": 9.986797484042706e-05, "loss": 0.0658, "step": 3600 }, { "epoch": 2.602739726027397, "grad_norm": 0.23212425410747528, "learning_rate": 9.986556234988512e-05, "loss": 0.0599, "step": 3610 }, { "epoch": 2.6099495313626533, "grad_norm": 0.22303499281406403, "learning_rate": 9.986312804645205e-05, "loss": 0.0621, "step": 3620 }, { "epoch": 2.617159336697909, "grad_norm": 0.25346678495407104, "learning_rate": 9.986067193119273e-05, "loss": 0.0616, "step": 3630 }, { "epoch": 2.624369142033165, "grad_norm": 0.20805665850639343, "learning_rate": 9.985819400518153e-05, "loss": 0.0653, "step": 3640 }, { "epoch": 2.6315789473684212, "grad_norm": 0.17589031159877777, "learning_rate": 9.985569426950239e-05, "loss": 0.0602, "step": 3650 }, { "epoch": 2.638788752703677, "grad_norm": 0.38030511140823364, "learning_rate": 9.985317272524876e-05, "loss": 0.0557, "step": 3660 }, { "epoch": 2.645998558038933, "grad_norm": 0.22201913595199585, "learning_rate": 9.985062937352366e-05, "loss": 0.0593, "step": 3670 }, { "epoch": 2.6532083633741887, "grad_norm": 0.2130642831325531, "learning_rate": 9.984806421543966e-05, "loss": 0.0673, "step": 3680 }, { "epoch": 2.6604181687094446, "grad_norm": 0.21566620469093323, "learning_rate": 9.984547725211881e-05, "loss": 0.0608, "step": 3690 }, { "epoch": 2.667627974044701, "grad_norm": 0.2523733079433441, "learning_rate": 9.984286848469276e-05, "loss": 0.0559, "step": 3700 }, { "epoch": 2.6748377793799567, "grad_norm": 0.23533882200717926, "learning_rate": 9.984023791430266e-05, "loss": 0.0619, "step": 3710 }, { "epoch": 2.6820475847152125, "grad_norm": 0.23202048242092133, "learning_rate": 9.983758554209924e-05, "loss": 0.0607, "step": 3720 }, { "epoch": 2.689257390050469, "grad_norm": 0.18658657371997833, "learning_rate": 9.983491136924268e-05, "loss": 0.0547, "step": 3730 }, { "epoch": 2.6964671953857247, "grad_norm": 0.2126341611146927, "learning_rate": 9.983221539690282e-05, "loss": 0.0667, "step": 3740 }, { "epoch": 2.7036770007209805, "grad_norm": 0.22038418054580688, "learning_rate": 9.982949762625892e-05, "loss": 0.0598, "step": 3750 }, { "epoch": 2.7108868060562363, "grad_norm": 0.2968807816505432, "learning_rate": 9.982675805849986e-05, "loss": 0.0586, "step": 3760 }, { "epoch": 2.718096611391492, "grad_norm": 0.23230478167533875, "learning_rate": 9.982399669482399e-05, "loss": 0.0641, "step": 3770 }, { "epoch": 2.7253064167267484, "grad_norm": 0.2075739949941635, "learning_rate": 9.982121353643924e-05, "loss": 0.0633, "step": 3780 }, { "epoch": 2.7325162220620043, "grad_norm": 0.21806077659130096, "learning_rate": 9.981840858456306e-05, "loss": 0.0542, "step": 3790 }, { "epoch": 2.73972602739726, "grad_norm": 0.2921372354030609, "learning_rate": 9.981558184042243e-05, "loss": 0.0605, "step": 3800 }, { "epoch": 2.7469358327325164, "grad_norm": 0.3022048771381378, "learning_rate": 9.981273330525387e-05, "loss": 0.0579, "step": 3810 }, { "epoch": 2.7541456380677722, "grad_norm": 0.23837485909461975, "learning_rate": 9.980986298030341e-05, "loss": 0.0647, "step": 3820 }, { "epoch": 2.761355443403028, "grad_norm": 0.1392785906791687, "learning_rate": 9.980697086682662e-05, "loss": 0.0577, "step": 3830 }, { "epoch": 2.768565248738284, "grad_norm": 0.25320830941200256, "learning_rate": 9.980405696608866e-05, "loss": 0.0605, "step": 3840 }, { "epoch": 2.7757750540735397, "grad_norm": 0.2515171766281128, "learning_rate": 9.980112127936409e-05, "loss": 0.0545, "step": 3850 }, { "epoch": 2.782984859408796, "grad_norm": 0.2511722445487976, "learning_rate": 9.979816380793717e-05, "loss": 0.0708, "step": 3860 }, { "epoch": 2.790194664744052, "grad_norm": 0.22062349319458008, "learning_rate": 9.979518455310151e-05, "loss": 0.065, "step": 3870 }, { "epoch": 2.7974044700793077, "grad_norm": 0.2547611594200134, "learning_rate": 9.97921835161604e-05, "loss": 0.0623, "step": 3880 }, { "epoch": 2.804614275414564, "grad_norm": 0.19483141601085663, "learning_rate": 9.978916069842656e-05, "loss": 0.0578, "step": 3890 }, { "epoch": 2.81182408074982, "grad_norm": 0.23989924788475037, "learning_rate": 9.97861161012223e-05, "loss": 0.0599, "step": 3900 }, { "epoch": 2.8190338860850757, "grad_norm": 0.20758624374866486, "learning_rate": 9.978304972587942e-05, "loss": 0.0636, "step": 3910 }, { "epoch": 2.8262436914203315, "grad_norm": 0.20391494035720825, "learning_rate": 9.977996157373925e-05, "loss": 0.061, "step": 3920 }, { "epoch": 2.8334534967555873, "grad_norm": 0.22325961291790009, "learning_rate": 9.977685164615265e-05, "loss": 0.0613, "step": 3930 }, { "epoch": 2.8406633020908436, "grad_norm": 0.2591201066970825, "learning_rate": 9.977371994448002e-05, "loss": 0.0616, "step": 3940 }, { "epoch": 2.8478731074260994, "grad_norm": 0.210562065243721, "learning_rate": 9.977056647009127e-05, "loss": 0.0633, "step": 3950 }, { "epoch": 2.8550829127613553, "grad_norm": 0.16298910975456238, "learning_rate": 9.976739122436582e-05, "loss": 0.0545, "step": 3960 }, { "epoch": 2.8622927180966116, "grad_norm": 0.18886180222034454, "learning_rate": 9.976419420869265e-05, "loss": 0.056, "step": 3970 }, { "epoch": 2.8695025234318674, "grad_norm": 0.1541513055562973, "learning_rate": 9.976097542447025e-05, "loss": 0.0526, "step": 3980 }, { "epoch": 2.8767123287671232, "grad_norm": 0.24827978014945984, "learning_rate": 9.97577348731066e-05, "loss": 0.0524, "step": 3990 }, { "epoch": 2.883922134102379, "grad_norm": 0.15992240607738495, "learning_rate": 9.975447255601927e-05, "loss": 0.0569, "step": 4000 }, { "epoch": 2.891131939437635, "grad_norm": 0.18680189549922943, "learning_rate": 9.975118847463525e-05, "loss": 0.0575, "step": 4010 }, { "epoch": 2.898341744772891, "grad_norm": 0.19612041115760803, "learning_rate": 9.974788263039114e-05, "loss": 0.0558, "step": 4020 }, { "epoch": 2.905551550108147, "grad_norm": 0.28335222601890564, "learning_rate": 9.974455502473303e-05, "loss": 0.0689, "step": 4030 }, { "epoch": 2.912761355443403, "grad_norm": 0.23546543717384338, "learning_rate": 9.974120565911652e-05, "loss": 0.0655, "step": 4040 }, { "epoch": 2.919971160778659, "grad_norm": 0.2855571508407593, "learning_rate": 9.973783453500674e-05, "loss": 0.0589, "step": 4050 }, { "epoch": 2.927180966113915, "grad_norm": 0.2420112043619156, "learning_rate": 9.973444165387835e-05, "loss": 0.063, "step": 4060 }, { "epoch": 2.934390771449171, "grad_norm": 0.23671653866767883, "learning_rate": 9.973102701721549e-05, "loss": 0.0583, "step": 4070 }, { "epoch": 2.9416005767844267, "grad_norm": 0.2503320574760437, "learning_rate": 9.972759062651184e-05, "loss": 0.0545, "step": 4080 }, { "epoch": 2.9488103821196825, "grad_norm": 0.22277942299842834, "learning_rate": 9.972413248327059e-05, "loss": 0.0533, "step": 4090 }, { "epoch": 2.9560201874549388, "grad_norm": 0.1996232271194458, "learning_rate": 9.972065258900447e-05, "loss": 0.0631, "step": 4100 }, { "epoch": 2.9632299927901946, "grad_norm": 0.2225467562675476, "learning_rate": 9.971715094523569e-05, "loss": 0.0617, "step": 4110 }, { "epoch": 2.9704397981254504, "grad_norm": 0.16490429639816284, "learning_rate": 9.971362755349598e-05, "loss": 0.0573, "step": 4120 }, { "epoch": 2.9776496034607067, "grad_norm": 0.28306251764297485, "learning_rate": 9.971008241532662e-05, "loss": 0.0594, "step": 4130 }, { "epoch": 2.9848594087959626, "grad_norm": 0.19434769451618195, "learning_rate": 9.970651553227835e-05, "loss": 0.0592, "step": 4140 }, { "epoch": 2.9920692141312184, "grad_norm": 0.20891642570495605, "learning_rate": 9.970292690591143e-05, "loss": 0.059, "step": 4150 }, { "epoch": 2.9992790194664742, "grad_norm": 0.18845438957214355, "learning_rate": 9.969931653779569e-05, "loss": 0.0517, "step": 4160 }, { "epoch": 3.0064888248017305, "grad_norm": 0.19494499266147614, "learning_rate": 9.969568442951038e-05, "loss": 0.0606, "step": 4170 }, { "epoch": 3.0136986301369864, "grad_norm": 0.23412065207958221, "learning_rate": 9.969203058264436e-05, "loss": 0.054, "step": 4180 }, { "epoch": 3.020908435472242, "grad_norm": 0.16418196260929108, "learning_rate": 9.96883549987959e-05, "loss": 0.0689, "step": 4190 }, { "epoch": 3.028118240807498, "grad_norm": 0.2205539047718048, "learning_rate": 9.968465767957287e-05, "loss": 0.0481, "step": 4200 }, { "epoch": 3.0353280461427543, "grad_norm": 0.23119905591011047, "learning_rate": 9.968093862659256e-05, "loss": 0.0581, "step": 4210 }, { "epoch": 3.04253785147801, "grad_norm": 0.2198716551065445, "learning_rate": 9.967719784148182e-05, "loss": 0.052, "step": 4220 }, { "epoch": 3.049747656813266, "grad_norm": 0.22931872308254242, "learning_rate": 9.967343532587702e-05, "loss": 0.0568, "step": 4230 }, { "epoch": 3.056957462148522, "grad_norm": 0.2836025357246399, "learning_rate": 9.966965108142399e-05, "loss": 0.0588, "step": 4240 }, { "epoch": 3.064167267483778, "grad_norm": 0.19334103167057037, "learning_rate": 9.96658451097781e-05, "loss": 0.0594, "step": 4250 }, { "epoch": 3.071377072819034, "grad_norm": 0.26054295897483826, "learning_rate": 9.966201741260419e-05, "loss": 0.0619, "step": 4260 }, { "epoch": 3.0785868781542898, "grad_norm": 0.23442819714546204, "learning_rate": 9.965816799157665e-05, "loss": 0.0568, "step": 4270 }, { "epoch": 3.0857966834895456, "grad_norm": 0.22592562437057495, "learning_rate": 9.965429684837935e-05, "loss": 0.0542, "step": 4280 }, { "epoch": 3.093006488824802, "grad_norm": 0.2005843073129654, "learning_rate": 9.965040398470562e-05, "loss": 0.0558, "step": 4290 }, { "epoch": 3.1002162941600577, "grad_norm": 0.16573181748390198, "learning_rate": 9.964648940225838e-05, "loss": 0.0509, "step": 4300 }, { "epoch": 3.1074260994953136, "grad_norm": 0.2415606528520584, "learning_rate": 9.964255310274997e-05, "loss": 0.0495, "step": 4310 }, { "epoch": 3.1146359048305694, "grad_norm": 0.20343531668186188, "learning_rate": 9.963859508790228e-05, "loss": 0.0563, "step": 4320 }, { "epoch": 3.1218457101658257, "grad_norm": 0.20190055668354034, "learning_rate": 9.963461535944664e-05, "loss": 0.0552, "step": 4330 }, { "epoch": 3.1290555155010815, "grad_norm": 0.22882796823978424, "learning_rate": 9.963061391912399e-05, "loss": 0.0545, "step": 4340 }, { "epoch": 3.1362653208363374, "grad_norm": 0.19108451902866364, "learning_rate": 9.962659076868463e-05, "loss": 0.0569, "step": 4350 }, { "epoch": 3.143475126171593, "grad_norm": 0.21977797150611877, "learning_rate": 9.962254590988846e-05, "loss": 0.0559, "step": 4360 }, { "epoch": 3.1506849315068495, "grad_norm": 0.21394771337509155, "learning_rate": 9.961847934450481e-05, "loss": 0.0572, "step": 4370 }, { "epoch": 3.1578947368421053, "grad_norm": 0.28301259875297546, "learning_rate": 9.961439107431257e-05, "loss": 0.05, "step": 4380 }, { "epoch": 3.165104542177361, "grad_norm": 0.22588522732257843, "learning_rate": 9.961028110110006e-05, "loss": 0.0615, "step": 4390 }, { "epoch": 3.172314347512617, "grad_norm": 0.18229028582572937, "learning_rate": 9.960614942666513e-05, "loss": 0.0573, "step": 4400 }, { "epoch": 3.1795241528478733, "grad_norm": 0.2134285271167755, "learning_rate": 9.960199605281511e-05, "loss": 0.0627, "step": 4410 }, { "epoch": 3.186733958183129, "grad_norm": 0.24957148730754852, "learning_rate": 9.959782098136683e-05, "loss": 0.0659, "step": 4420 }, { "epoch": 3.193943763518385, "grad_norm": 0.20752695202827454, "learning_rate": 9.959362421414662e-05, "loss": 0.0572, "step": 4430 }, { "epoch": 3.2011535688536408, "grad_norm": 0.2527250051498413, "learning_rate": 9.958940575299027e-05, "loss": 0.0624, "step": 4440 }, { "epoch": 3.208363374188897, "grad_norm": 0.24000585079193115, "learning_rate": 9.95851655997431e-05, "loss": 0.0472, "step": 4450 }, { "epoch": 3.215573179524153, "grad_norm": 0.15970809757709503, "learning_rate": 9.958090375625986e-05, "loss": 0.0529, "step": 4460 }, { "epoch": 3.2227829848594087, "grad_norm": 0.2609306275844574, "learning_rate": 9.957662022440486e-05, "loss": 0.0537, "step": 4470 }, { "epoch": 3.2299927901946646, "grad_norm": 0.19428515434265137, "learning_rate": 9.957231500605187e-05, "loss": 0.053, "step": 4480 }, { "epoch": 3.237202595529921, "grad_norm": 0.21363750100135803, "learning_rate": 9.95679881030841e-05, "loss": 0.058, "step": 4490 }, { "epoch": 3.2444124008651767, "grad_norm": 0.21077154576778412, "learning_rate": 9.95636395173943e-05, "loss": 0.0602, "step": 4500 }, { "epoch": 3.2516222062004325, "grad_norm": 0.1671418696641922, "learning_rate": 9.95592692508847e-05, "loss": 0.0568, "step": 4510 }, { "epoch": 3.2588320115356884, "grad_norm": 0.22459477186203003, "learning_rate": 9.9554877305467e-05, "loss": 0.0562, "step": 4520 }, { "epoch": 3.2660418168709446, "grad_norm": 0.23885273933410645, "learning_rate": 9.955046368306237e-05, "loss": 0.052, "step": 4530 }, { "epoch": 3.2732516222062005, "grad_norm": 0.21578574180603027, "learning_rate": 9.954602838560153e-05, "loss": 0.0523, "step": 4540 }, { "epoch": 3.2804614275414563, "grad_norm": 0.16184887290000916, "learning_rate": 9.954157141502456e-05, "loss": 0.0514, "step": 4550 }, { "epoch": 3.287671232876712, "grad_norm": 0.1807660162448883, "learning_rate": 9.953709277328112e-05, "loss": 0.0511, "step": 4560 }, { "epoch": 3.2948810382119684, "grad_norm": 0.21872998774051666, "learning_rate": 9.953259246233032e-05, "loss": 0.0543, "step": 4570 }, { "epoch": 3.3020908435472243, "grad_norm": 0.1949051022529602, "learning_rate": 9.952807048414077e-05, "loss": 0.0545, "step": 4580 }, { "epoch": 3.30930064888248, "grad_norm": 0.16817277669906616, "learning_rate": 9.95235268406905e-05, "loss": 0.0552, "step": 4590 }, { "epoch": 3.316510454217736, "grad_norm": 0.241011381149292, "learning_rate": 9.951896153396708e-05, "loss": 0.0553, "step": 4600 }, { "epoch": 3.323720259552992, "grad_norm": 0.2521686553955078, "learning_rate": 9.95143745659675e-05, "loss": 0.0641, "step": 4610 }, { "epoch": 3.330930064888248, "grad_norm": 0.23373080790042877, "learning_rate": 9.95097659386983e-05, "loss": 0.0579, "step": 4620 }, { "epoch": 3.338139870223504, "grad_norm": 0.16555771231651306, "learning_rate": 9.950513565417542e-05, "loss": 0.0557, "step": 4630 }, { "epoch": 3.3453496755587597, "grad_norm": 0.2751065194606781, "learning_rate": 9.95004837144243e-05, "loss": 0.0548, "step": 4640 }, { "epoch": 3.352559480894016, "grad_norm": 0.19840656220912933, "learning_rate": 9.949581012147988e-05, "loss": 0.0528, "step": 4650 }, { "epoch": 3.359769286229272, "grad_norm": 0.20215187966823578, "learning_rate": 9.949111487738653e-05, "loss": 0.0684, "step": 4660 }, { "epoch": 3.3669790915645277, "grad_norm": 0.19620227813720703, "learning_rate": 9.948639798419813e-05, "loss": 0.0517, "step": 4670 }, { "epoch": 3.3741888968997835, "grad_norm": 0.14991450309753418, "learning_rate": 9.948165944397799e-05, "loss": 0.0529, "step": 4680 }, { "epoch": 3.38139870223504, "grad_norm": 0.21874834597110748, "learning_rate": 9.94768992587989e-05, "loss": 0.0531, "step": 4690 }, { "epoch": 3.3886085075702956, "grad_norm": 0.19121050834655762, "learning_rate": 9.947211743074313e-05, "loss": 0.0566, "step": 4700 }, { "epoch": 3.3958183129055515, "grad_norm": 0.16081459820270538, "learning_rate": 9.946731396190246e-05, "loss": 0.0514, "step": 4710 }, { "epoch": 3.4030281182408073, "grad_norm": 0.21708935499191284, "learning_rate": 9.946248885437803e-05, "loss": 0.0506, "step": 4720 }, { "epoch": 3.4102379235760636, "grad_norm": 0.186662495136261, "learning_rate": 9.945764211028053e-05, "loss": 0.0504, "step": 4730 }, { "epoch": 3.4174477289113194, "grad_norm": 0.21488630771636963, "learning_rate": 9.94527737317301e-05, "loss": 0.0521, "step": 4740 }, { "epoch": 3.4246575342465753, "grad_norm": 0.15666016936302185, "learning_rate": 9.944788372085631e-05, "loss": 0.0533, "step": 4750 }, { "epoch": 3.431867339581831, "grad_norm": 0.22499637305736542, "learning_rate": 9.944297207979825e-05, "loss": 0.0514, "step": 4760 }, { "epoch": 3.4390771449170874, "grad_norm": 0.18987992405891418, "learning_rate": 9.943803881070441e-05, "loss": 0.0562, "step": 4770 }, { "epoch": 3.446286950252343, "grad_norm": 0.19378235936164856, "learning_rate": 9.943308391573278e-05, "loss": 0.0599, "step": 4780 }, { "epoch": 3.453496755587599, "grad_norm": 0.20094987750053406, "learning_rate": 9.942810739705079e-05, "loss": 0.0475, "step": 4790 }, { "epoch": 3.460706560922855, "grad_norm": 0.20877201855182648, "learning_rate": 9.942310925683538e-05, "loss": 0.0586, "step": 4800 }, { "epoch": 3.467916366258111, "grad_norm": 0.21673685312271118, "learning_rate": 9.941808949727285e-05, "loss": 0.054, "step": 4810 }, { "epoch": 3.475126171593367, "grad_norm": 0.19670914113521576, "learning_rate": 9.941304812055903e-05, "loss": 0.055, "step": 4820 }, { "epoch": 3.482335976928623, "grad_norm": 0.3309347331523895, "learning_rate": 9.940798512889921e-05, "loss": 0.0579, "step": 4830 }, { "epoch": 3.4895457822638787, "grad_norm": 0.18098123371601105, "learning_rate": 9.94029005245081e-05, "loss": 0.0689, "step": 4840 }, { "epoch": 3.496755587599135, "grad_norm": 0.24029551446437836, "learning_rate": 9.939779430960988e-05, "loss": 0.0546, "step": 4850 }, { "epoch": 3.503965392934391, "grad_norm": 0.2556634545326233, "learning_rate": 9.939266648643817e-05, "loss": 0.0558, "step": 4860 }, { "epoch": 3.5111751982696466, "grad_norm": 0.20123928785324097, "learning_rate": 9.938751705723607e-05, "loss": 0.0557, "step": 4870 }, { "epoch": 3.5183850036049025, "grad_norm": 0.26681408286094666, "learning_rate": 9.938234602425613e-05, "loss": 0.0601, "step": 4880 }, { "epoch": 3.5255948089401588, "grad_norm": 0.18695992231369019, "learning_rate": 9.93771533897603e-05, "loss": 0.0509, "step": 4890 }, { "epoch": 3.5328046142754146, "grad_norm": 0.22199514508247375, "learning_rate": 9.937193915602004e-05, "loss": 0.0581, "step": 4900 }, { "epoch": 3.5400144196106704, "grad_norm": 0.16165952384471893, "learning_rate": 9.936670332531621e-05, "loss": 0.0557, "step": 4910 }, { "epoch": 3.5472242249459267, "grad_norm": 0.19019141793251038, "learning_rate": 9.936144589993916e-05, "loss": 0.0509, "step": 4920 }, { "epoch": 3.5544340302811825, "grad_norm": 0.17845208942890167, "learning_rate": 9.935616688218867e-05, "loss": 0.0586, "step": 4930 }, { "epoch": 3.5616438356164384, "grad_norm": 0.2118343710899353, "learning_rate": 9.935086627437395e-05, "loss": 0.0505, "step": 4940 }, { "epoch": 3.568853640951694, "grad_norm": 0.21718868613243103, "learning_rate": 9.934554407881366e-05, "loss": 0.0561, "step": 4950 }, { "epoch": 3.57606344628695, "grad_norm": 0.2672552466392517, "learning_rate": 9.934020029783593e-05, "loss": 0.0523, "step": 4960 }, { "epoch": 3.5832732516222063, "grad_norm": 0.23287363350391388, "learning_rate": 9.933483493377829e-05, "loss": 0.0513, "step": 4970 }, { "epoch": 3.590483056957462, "grad_norm": 0.15232285857200623, "learning_rate": 9.932944798898774e-05, "loss": 0.0633, "step": 4980 }, { "epoch": 3.597692862292718, "grad_norm": 0.14974118769168854, "learning_rate": 9.932403946582072e-05, "loss": 0.0566, "step": 4990 }, { "epoch": 3.6049026676279743, "grad_norm": 0.21217961609363556, "learning_rate": 9.93186093666431e-05, "loss": 0.0588, "step": 5000 }, { "epoch": 3.61211247296323, "grad_norm": 0.22987380623817444, "learning_rate": 9.931315769383018e-05, "loss": 0.0642, "step": 5010 }, { "epoch": 3.619322278298486, "grad_norm": 0.2097453624010086, "learning_rate": 9.930768444976672e-05, "loss": 0.0551, "step": 5020 }, { "epoch": 3.626532083633742, "grad_norm": 0.22014889121055603, "learning_rate": 9.93021896368469e-05, "loss": 0.0509, "step": 5030 }, { "epoch": 3.6337418889689976, "grad_norm": 0.1449861377477646, "learning_rate": 9.929667325747431e-05, "loss": 0.0507, "step": 5040 }, { "epoch": 3.640951694304254, "grad_norm": 0.14882171154022217, "learning_rate": 9.929113531406205e-05, "loss": 0.0538, "step": 5050 }, { "epoch": 3.6481614996395098, "grad_norm": 0.2727383077144623, "learning_rate": 9.928557580903257e-05, "loss": 0.0527, "step": 5060 }, { "epoch": 3.6553713049747656, "grad_norm": 0.18257054686546326, "learning_rate": 9.927999474481779e-05, "loss": 0.0489, "step": 5070 }, { "epoch": 3.662581110310022, "grad_norm": 0.18791833519935608, "learning_rate": 9.927439212385907e-05, "loss": 0.0515, "step": 5080 }, { "epoch": 3.6697909156452777, "grad_norm": 0.1903783231973648, "learning_rate": 9.926876794860718e-05, "loss": 0.0548, "step": 5090 }, { "epoch": 3.6770007209805335, "grad_norm": 0.14642426371574402, "learning_rate": 9.926312222152235e-05, "loss": 0.0542, "step": 5100 }, { "epoch": 3.6842105263157894, "grad_norm": 0.15763439238071442, "learning_rate": 9.925745494507414e-05, "loss": 0.0547, "step": 5110 }, { "epoch": 3.691420331651045, "grad_norm": 0.1827734112739563, "learning_rate": 9.925176612174169e-05, "loss": 0.0616, "step": 5120 }, { "epoch": 3.6986301369863015, "grad_norm": 0.21785780787467957, "learning_rate": 9.924605575401346e-05, "loss": 0.0552, "step": 5130 }, { "epoch": 3.7058399423215573, "grad_norm": 0.2297564297914505, "learning_rate": 9.924032384438733e-05, "loss": 0.0529, "step": 5140 }, { "epoch": 3.713049747656813, "grad_norm": 0.19246385991573334, "learning_rate": 9.923457039537066e-05, "loss": 0.0546, "step": 5150 }, { "epoch": 3.7202595529920695, "grad_norm": 0.15146702527999878, "learning_rate": 9.92287954094802e-05, "loss": 0.0509, "step": 5160 }, { "epoch": 3.7274693583273253, "grad_norm": 0.1953974813222885, "learning_rate": 9.922299888924212e-05, "loss": 0.0494, "step": 5170 }, { "epoch": 3.734679163662581, "grad_norm": 0.213666632771492, "learning_rate": 9.921718083719203e-05, "loss": 0.0587, "step": 5180 }, { "epoch": 3.741888968997837, "grad_norm": 0.18534255027770996, "learning_rate": 9.921134125587491e-05, "loss": 0.0549, "step": 5190 }, { "epoch": 3.749098774333093, "grad_norm": 0.16438624262809753, "learning_rate": 9.920548014784523e-05, "loss": 0.0573, "step": 5200 }, { "epoch": 3.756308579668349, "grad_norm": 0.13740363717079163, "learning_rate": 9.919959751566681e-05, "loss": 0.0527, "step": 5210 }, { "epoch": 3.763518385003605, "grad_norm": 0.1374061405658722, "learning_rate": 9.919369336191291e-05, "loss": 0.0557, "step": 5220 }, { "epoch": 3.7707281903388608, "grad_norm": 0.18367105722427368, "learning_rate": 9.918776768916625e-05, "loss": 0.0541, "step": 5230 }, { "epoch": 3.777937995674117, "grad_norm": 0.20453768968582153, "learning_rate": 9.918182050001888e-05, "loss": 0.051, "step": 5240 }, { "epoch": 3.785147801009373, "grad_norm": 0.1655072271823883, "learning_rate": 9.91758517970723e-05, "loss": 0.0479, "step": 5250 }, { "epoch": 3.7923576063446287, "grad_norm": 0.21248072385787964, "learning_rate": 9.916986158293744e-05, "loss": 0.0589, "step": 5260 }, { "epoch": 3.7995674116798845, "grad_norm": 0.17124280333518982, "learning_rate": 9.916384986023463e-05, "loss": 0.0602, "step": 5270 }, { "epoch": 3.8067772170151404, "grad_norm": 0.2145577222108841, "learning_rate": 9.915781663159359e-05, "loss": 0.0595, "step": 5280 }, { "epoch": 3.8139870223503967, "grad_norm": 0.1712173968553543, "learning_rate": 9.915176189965346e-05, "loss": 0.0571, "step": 5290 }, { "epoch": 3.8211968276856525, "grad_norm": 0.17045652866363525, "learning_rate": 9.914568566706279e-05, "loss": 0.0484, "step": 5300 }, { "epoch": 3.8284066330209083, "grad_norm": 0.14389769732952118, "learning_rate": 9.913958793647953e-05, "loss": 0.0556, "step": 5310 }, { "epoch": 3.8356164383561646, "grad_norm": 0.17493285238742828, "learning_rate": 9.913346871057102e-05, "loss": 0.0514, "step": 5320 }, { "epoch": 3.8428262436914205, "grad_norm": 0.19466960430145264, "learning_rate": 9.912732799201403e-05, "loss": 0.0503, "step": 5330 }, { "epoch": 3.8500360490266763, "grad_norm": 0.17109878361225128, "learning_rate": 9.912116578349474e-05, "loss": 0.0522, "step": 5340 }, { "epoch": 3.857245854361932, "grad_norm": 0.15233775973320007, "learning_rate": 9.911498208770867e-05, "loss": 0.0522, "step": 5350 }, { "epoch": 3.864455659697188, "grad_norm": 0.17916353046894073, "learning_rate": 9.910877690736078e-05, "loss": 0.0534, "step": 5360 }, { "epoch": 3.8716654650324442, "grad_norm": 0.12937480211257935, "learning_rate": 9.910255024516546e-05, "loss": 0.064, "step": 5370 }, { "epoch": 3.8788752703677, "grad_norm": 0.24444076418876648, "learning_rate": 9.909630210384644e-05, "loss": 0.0555, "step": 5380 }, { "epoch": 3.886085075702956, "grad_norm": 0.21937844157218933, "learning_rate": 9.909003248613688e-05, "loss": 0.0514, "step": 5390 }, { "epoch": 3.893294881038212, "grad_norm": 0.26020434498786926, "learning_rate": 9.908374139477934e-05, "loss": 0.0594, "step": 5400 }, { "epoch": 3.900504686373468, "grad_norm": 0.22179317474365234, "learning_rate": 9.907742883252571e-05, "loss": 0.0536, "step": 5410 }, { "epoch": 3.907714491708724, "grad_norm": 0.25188106298446655, "learning_rate": 9.907109480213736e-05, "loss": 0.0533, "step": 5420 }, { "epoch": 3.9149242970439797, "grad_norm": 0.20935001969337463, "learning_rate": 9.906473930638498e-05, "loss": 0.0477, "step": 5430 }, { "epoch": 3.9221341023792355, "grad_norm": 0.2217789590358734, "learning_rate": 9.90583623480487e-05, "loss": 0.0533, "step": 5440 }, { "epoch": 3.929343907714492, "grad_norm": 0.1423070728778839, "learning_rate": 9.905196392991802e-05, "loss": 0.0605, "step": 5450 }, { "epoch": 3.9365537130497477, "grad_norm": 0.1998324692249298, "learning_rate": 9.904554405479183e-05, "loss": 0.0469, "step": 5460 }, { "epoch": 3.9437635183850035, "grad_norm": 0.19943168759346008, "learning_rate": 9.903910272547838e-05, "loss": 0.0509, "step": 5470 }, { "epoch": 3.9509733237202598, "grad_norm": 0.1654740571975708, "learning_rate": 9.903263994479533e-05, "loss": 0.051, "step": 5480 }, { "epoch": 3.9581831290555156, "grad_norm": 0.1834978461265564, "learning_rate": 9.902615571556972e-05, "loss": 0.0518, "step": 5490 }, { "epoch": 3.9653929343907715, "grad_norm": 0.17146818339824677, "learning_rate": 9.9019650040638e-05, "loss": 0.0469, "step": 5500 }, { "epoch": 3.9726027397260273, "grad_norm": 0.1669846773147583, "learning_rate": 9.901312292284594e-05, "loss": 0.0511, "step": 5510 }, { "epoch": 3.979812545061283, "grad_norm": 0.1441197544336319, "learning_rate": 9.900657436504875e-05, "loss": 0.0497, "step": 5520 }, { "epoch": 3.9870223503965394, "grad_norm": 0.19736772775650024, "learning_rate": 9.900000437011097e-05, "loss": 0.0614, "step": 5530 }, { "epoch": 3.9942321557317952, "grad_norm": 0.15762782096862793, "learning_rate": 9.899341294090653e-05, "loss": 0.0515, "step": 5540 }, { "epoch": 4.0014419610670515, "grad_norm": 0.18993471562862396, "learning_rate": 9.898680008031877e-05, "loss": 0.0496, "step": 5550 }, { "epoch": 4.008651766402307, "grad_norm": 0.19017747044563293, "learning_rate": 9.898016579124037e-05, "loss": 0.0508, "step": 5560 }, { "epoch": 4.015861571737563, "grad_norm": 0.17568841576576233, "learning_rate": 9.89735100765734e-05, "loss": 0.0495, "step": 5570 }, { "epoch": 4.023071377072819, "grad_norm": 0.18694503605365753, "learning_rate": 9.89668329392293e-05, "loss": 0.0486, "step": 5580 }, { "epoch": 4.030281182408075, "grad_norm": 0.1754835993051529, "learning_rate": 9.896013438212885e-05, "loss": 0.0547, "step": 5590 }, { "epoch": 4.037490987743331, "grad_norm": 0.13857391476631165, "learning_rate": 9.895341440820225e-05, "loss": 0.0539, "step": 5600 }, { "epoch": 4.0447007930785865, "grad_norm": 0.22515340149402618, "learning_rate": 9.894667302038902e-05, "loss": 0.0592, "step": 5610 }, { "epoch": 4.051910598413842, "grad_norm": 0.1511872112751007, "learning_rate": 9.893991022163812e-05, "loss": 0.0514, "step": 5620 }, { "epoch": 4.059120403749099, "grad_norm": 0.21927641332149506, "learning_rate": 9.893312601490779e-05, "loss": 0.0504, "step": 5630 }, { "epoch": 4.066330209084355, "grad_norm": 0.19293980300426483, "learning_rate": 9.892632040316568e-05, "loss": 0.0524, "step": 5640 }, { "epoch": 4.073540014419611, "grad_norm": 0.2175953984260559, "learning_rate": 9.891949338938877e-05, "loss": 0.0489, "step": 5650 }, { "epoch": 4.080749819754867, "grad_norm": 0.15067683160305023, "learning_rate": 9.89126449765635e-05, "loss": 0.0489, "step": 5660 }, { "epoch": 4.0879596250901225, "grad_norm": 0.1966468095779419, "learning_rate": 9.890577516768551e-05, "loss": 0.0503, "step": 5670 }, { "epoch": 4.095169430425378, "grad_norm": 0.24676842987537384, "learning_rate": 9.889888396575995e-05, "loss": 0.0532, "step": 5680 }, { "epoch": 4.102379235760634, "grad_norm": 0.18678903579711914, "learning_rate": 9.889197137380125e-05, "loss": 0.0528, "step": 5690 }, { "epoch": 4.109589041095891, "grad_norm": 0.19760651886463165, "learning_rate": 9.888503739483321e-05, "loss": 0.0516, "step": 5700 }, { "epoch": 4.116798846431147, "grad_norm": 0.19678059220314026, "learning_rate": 9.887808203188897e-05, "loss": 0.0484, "step": 5710 }, { "epoch": 4.1240086517664025, "grad_norm": 0.203739732503891, "learning_rate": 9.887110528801108e-05, "loss": 0.0519, "step": 5720 }, { "epoch": 4.131218457101658, "grad_norm": 0.194625124335289, "learning_rate": 9.886410716625137e-05, "loss": 0.0576, "step": 5730 }, { "epoch": 4.138428262436914, "grad_norm": 0.18640011548995972, "learning_rate": 9.885708766967108e-05, "loss": 0.0487, "step": 5740 }, { "epoch": 4.14563806777217, "grad_norm": 0.1766534000635147, "learning_rate": 9.885004680134076e-05, "loss": 0.0487, "step": 5750 }, { "epoch": 4.152847873107426, "grad_norm": 0.22839152812957764, "learning_rate": 9.884298456434033e-05, "loss": 0.047, "step": 5760 }, { "epoch": 4.160057678442682, "grad_norm": 0.1764470487833023, "learning_rate": 9.883590096175905e-05, "loss": 0.054, "step": 5770 }, { "epoch": 4.1672674837779375, "grad_norm": 0.1855277270078659, "learning_rate": 9.882879599669554e-05, "loss": 0.0556, "step": 5780 }, { "epoch": 4.174477289113194, "grad_norm": 0.15342514216899872, "learning_rate": 9.882166967225775e-05, "loss": 0.0523, "step": 5790 }, { "epoch": 4.18168709444845, "grad_norm": 0.22866111993789673, "learning_rate": 9.881452199156296e-05, "loss": 0.047, "step": 5800 }, { "epoch": 4.188896899783706, "grad_norm": 0.23083847761154175, "learning_rate": 9.880735295773782e-05, "loss": 0.0479, "step": 5810 }, { "epoch": 4.196106705118962, "grad_norm": 0.1815750002861023, "learning_rate": 9.88001625739183e-05, "loss": 0.0416, "step": 5820 }, { "epoch": 4.203316510454218, "grad_norm": 0.1739431470632553, "learning_rate": 9.879295084324976e-05, "loss": 0.0581, "step": 5830 }, { "epoch": 4.2105263157894735, "grad_norm": 0.19770634174346924, "learning_rate": 9.87857177688868e-05, "loss": 0.05, "step": 5840 }, { "epoch": 4.217736121124729, "grad_norm": 0.2005169689655304, "learning_rate": 9.877846335399343e-05, "loss": 0.0573, "step": 5850 }, { "epoch": 4.224945926459986, "grad_norm": 0.17079173028469086, "learning_rate": 9.8771187601743e-05, "loss": 0.0544, "step": 5860 }, { "epoch": 4.232155731795242, "grad_norm": 0.11856459081172943, "learning_rate": 9.876389051531813e-05, "loss": 0.0454, "step": 5870 }, { "epoch": 4.239365537130498, "grad_norm": 0.19257637858390808, "learning_rate": 9.875657209791088e-05, "loss": 0.0532, "step": 5880 }, { "epoch": 4.2465753424657535, "grad_norm": 0.17427320778369904, "learning_rate": 9.874923235272248e-05, "loss": 0.057, "step": 5890 }, { "epoch": 4.253785147801009, "grad_norm": 0.22381578385829926, "learning_rate": 9.874187128296369e-05, "loss": 0.0474, "step": 5900 }, { "epoch": 4.260994953136265, "grad_norm": 0.1586090326309204, "learning_rate": 9.873448889185439e-05, "loss": 0.0505, "step": 5910 }, { "epoch": 4.268204758471521, "grad_norm": 0.1995134949684143, "learning_rate": 9.872708518262397e-05, "loss": 0.0586, "step": 5920 }, { "epoch": 4.275414563806777, "grad_norm": 0.1718788743019104, "learning_rate": 9.871966015851101e-05, "loss": 0.0531, "step": 5930 }, { "epoch": 4.282624369142033, "grad_norm": 0.20282918214797974, "learning_rate": 9.87122138227635e-05, "loss": 0.0473, "step": 5940 }, { "epoch": 4.289834174477289, "grad_norm": 0.1715880036354065, "learning_rate": 9.87047461786387e-05, "loss": 0.0522, "step": 5950 }, { "epoch": 4.297043979812545, "grad_norm": 0.17270547151565552, "learning_rate": 9.869725722940323e-05, "loss": 0.0456, "step": 5960 }, { "epoch": 4.304253785147801, "grad_norm": 0.16824068129062653, "learning_rate": 9.868974697833299e-05, "loss": 0.0471, "step": 5970 }, { "epoch": 4.311463590483057, "grad_norm": 0.20704159140586853, "learning_rate": 9.868221542871324e-05, "loss": 0.0467, "step": 5980 }, { "epoch": 4.318673395818313, "grad_norm": 0.13805091381072998, "learning_rate": 9.867466258383853e-05, "loss": 0.0501, "step": 5990 }, { "epoch": 4.325883201153569, "grad_norm": 0.14495350420475006, "learning_rate": 9.866708844701272e-05, "loss": 0.0562, "step": 6000 }, { "epoch": 4.3330930064888245, "grad_norm": 0.12607651948928833, "learning_rate": 9.865949302154899e-05, "loss": 0.0463, "step": 6010 }, { "epoch": 4.340302811824081, "grad_norm": 0.160060852766037, "learning_rate": 9.865187631076987e-05, "loss": 0.0491, "step": 6020 }, { "epoch": 4.347512617159337, "grad_norm": 0.15292657911777496, "learning_rate": 9.864423831800712e-05, "loss": 0.0491, "step": 6030 }, { "epoch": 4.354722422494593, "grad_norm": 0.17620819807052612, "learning_rate": 9.863657904660191e-05, "loss": 0.0493, "step": 6040 }, { "epoch": 4.361932227829849, "grad_norm": 0.1870705485343933, "learning_rate": 9.862889849990462e-05, "loss": 0.0525, "step": 6050 }, { "epoch": 4.3691420331651045, "grad_norm": 0.21059972047805786, "learning_rate": 9.8621196681275e-05, "loss": 0.0505, "step": 6060 }, { "epoch": 4.37635183850036, "grad_norm": 0.16355542838573456, "learning_rate": 9.861347359408211e-05, "loss": 0.0432, "step": 6070 }, { "epoch": 4.383561643835616, "grad_norm": 0.20352202653884888, "learning_rate": 9.860572924170426e-05, "loss": 0.0531, "step": 6080 }, { "epoch": 4.390771449170872, "grad_norm": 0.18314887583255768, "learning_rate": 9.85979636275291e-05, "loss": 0.0495, "step": 6090 }, { "epoch": 4.397981254506128, "grad_norm": 0.12753765285015106, "learning_rate": 9.859017675495357e-05, "loss": 0.0412, "step": 6100 }, { "epoch": 4.405191059841385, "grad_norm": 0.15301412343978882, "learning_rate": 9.858236862738392e-05, "loss": 0.0529, "step": 6110 }, { "epoch": 4.41240086517664, "grad_norm": 0.24053321778774261, "learning_rate": 9.85745392482357e-05, "loss": 0.0488, "step": 6120 }, { "epoch": 4.419610670511896, "grad_norm": 0.17551131546497345, "learning_rate": 9.856668862093372e-05, "loss": 0.0498, "step": 6130 }, { "epoch": 4.426820475847152, "grad_norm": 0.1834782510995865, "learning_rate": 9.855881674891214e-05, "loss": 0.0492, "step": 6140 }, { "epoch": 4.434030281182408, "grad_norm": 0.17750558257102966, "learning_rate": 9.855092363561437e-05, "loss": 0.0446, "step": 6150 }, { "epoch": 4.441240086517664, "grad_norm": 0.17956428229808807, "learning_rate": 9.854300928449312e-05, "loss": 0.0493, "step": 6160 }, { "epoch": 4.44844989185292, "grad_norm": 0.14611999690532684, "learning_rate": 9.85350736990104e-05, "loss": 0.0481, "step": 6170 }, { "epoch": 4.455659697188176, "grad_norm": 0.1530769169330597, "learning_rate": 9.852711688263752e-05, "loss": 0.0546, "step": 6180 }, { "epoch": 4.462869502523432, "grad_norm": 0.18099242448806763, "learning_rate": 9.851913883885503e-05, "loss": 0.0533, "step": 6190 }, { "epoch": 4.470079307858688, "grad_norm": 0.1842096894979477, "learning_rate": 9.851113957115283e-05, "loss": 0.0581, "step": 6200 }, { "epoch": 4.477289113193944, "grad_norm": 0.17848356068134308, "learning_rate": 9.850311908303005e-05, "loss": 0.0474, "step": 6210 }, { "epoch": 4.4844989185292, "grad_norm": 0.20240773260593414, "learning_rate": 9.849507737799514e-05, "loss": 0.0504, "step": 6220 }, { "epoch": 4.4917087238644555, "grad_norm": 0.2137025147676468, "learning_rate": 9.84870144595658e-05, "loss": 0.0535, "step": 6230 }, { "epoch": 4.498918529199711, "grad_norm": 0.1759490817785263, "learning_rate": 9.847893033126903e-05, "loss": 0.0546, "step": 6240 }, { "epoch": 4.506128334534967, "grad_norm": 0.1859772503376007, "learning_rate": 9.847082499664111e-05, "loss": 0.0503, "step": 6250 }, { "epoch": 4.513338139870223, "grad_norm": 0.21446777880191803, "learning_rate": 9.846269845922758e-05, "loss": 0.0514, "step": 6260 }, { "epoch": 4.52054794520548, "grad_norm": 0.15398208796977997, "learning_rate": 9.845455072258326e-05, "loss": 0.0479, "step": 6270 }, { "epoch": 4.527757750540736, "grad_norm": 0.15850810706615448, "learning_rate": 9.844638179027226e-05, "loss": 0.0529, "step": 6280 }, { "epoch": 4.534967555875991, "grad_norm": 0.22123536467552185, "learning_rate": 9.843819166586795e-05, "loss": 0.0561, "step": 6290 }, { "epoch": 4.542177361211247, "grad_norm": 0.1643032282590866, "learning_rate": 9.842998035295294e-05, "loss": 0.0507, "step": 6300 }, { "epoch": 4.549387166546503, "grad_norm": 0.1363152116537094, "learning_rate": 9.842174785511918e-05, "loss": 0.0486, "step": 6310 }, { "epoch": 4.556596971881759, "grad_norm": 0.169725239276886, "learning_rate": 9.841349417596779e-05, "loss": 0.047, "step": 6320 }, { "epoch": 4.563806777217015, "grad_norm": 0.2301744967699051, "learning_rate": 9.840521931910926e-05, "loss": 0.0501, "step": 6330 }, { "epoch": 4.5710165825522715, "grad_norm": 0.19459028542041779, "learning_rate": 9.839692328816327e-05, "loss": 0.0532, "step": 6340 }, { "epoch": 4.578226387887527, "grad_norm": 0.17016011476516724, "learning_rate": 9.838860608675879e-05, "loss": 0.0432, "step": 6350 }, { "epoch": 4.585436193222783, "grad_norm": 0.20355431735515594, "learning_rate": 9.838026771853406e-05, "loss": 0.0508, "step": 6360 }, { "epoch": 4.592645998558039, "grad_norm": 0.19157961010932922, "learning_rate": 9.837190818713655e-05, "loss": 0.048, "step": 6370 }, { "epoch": 4.599855803893295, "grad_norm": 0.17440928518772125, "learning_rate": 9.836352749622299e-05, "loss": 0.0456, "step": 6380 }, { "epoch": 4.607065609228551, "grad_norm": 0.1964336484670639, "learning_rate": 9.835512564945941e-05, "loss": 0.0501, "step": 6390 }, { "epoch": 4.6142754145638065, "grad_norm": 0.1655006855726242, "learning_rate": 9.834670265052104e-05, "loss": 0.0526, "step": 6400 }, { "epoch": 4.621485219899062, "grad_norm": 0.18403968214988708, "learning_rate": 9.833825850309239e-05, "loss": 0.0479, "step": 6410 }, { "epoch": 4.628695025234318, "grad_norm": 0.20894576609134674, "learning_rate": 9.832979321086723e-05, "loss": 0.0523, "step": 6420 }, { "epoch": 4.635904830569575, "grad_norm": 0.16066491603851318, "learning_rate": 9.832130677754854e-05, "loss": 0.0491, "step": 6430 }, { "epoch": 4.643114635904831, "grad_norm": 0.16334868967533112, "learning_rate": 9.831279920684861e-05, "loss": 0.0479, "step": 6440 }, { "epoch": 4.650324441240087, "grad_norm": 0.20719417929649353, "learning_rate": 9.830427050248891e-05, "loss": 0.0467, "step": 6450 }, { "epoch": 4.657534246575342, "grad_norm": 0.17065921425819397, "learning_rate": 9.829572066820021e-05, "loss": 0.0459, "step": 6460 }, { "epoch": 4.664744051910598, "grad_norm": 0.14903612434864044, "learning_rate": 9.828714970772247e-05, "loss": 0.0522, "step": 6470 }, { "epoch": 4.671953857245854, "grad_norm": 0.17349772155284882, "learning_rate": 9.827855762480493e-05, "loss": 0.0472, "step": 6480 }, { "epoch": 4.67916366258111, "grad_norm": 0.16772998869419098, "learning_rate": 9.826994442320608e-05, "loss": 0.05, "step": 6490 }, { "epoch": 4.686373467916367, "grad_norm": 0.20262961089611053, "learning_rate": 9.82613101066936e-05, "loss": 0.0511, "step": 6500 }, { "epoch": 4.6935832732516225, "grad_norm": 0.2621898055076599, "learning_rate": 9.825265467904445e-05, "loss": 0.0512, "step": 6510 }, { "epoch": 4.700793078586878, "grad_norm": 0.14481782913208008, "learning_rate": 9.82439781440448e-05, "loss": 0.0519, "step": 6520 }, { "epoch": 4.708002883922134, "grad_norm": 0.17054978013038635, "learning_rate": 9.823528050549006e-05, "loss": 0.0587, "step": 6530 }, { "epoch": 4.71521268925739, "grad_norm": 0.18999618291854858, "learning_rate": 9.822656176718487e-05, "loss": 0.0491, "step": 6540 }, { "epoch": 4.722422494592646, "grad_norm": 0.18846949934959412, "learning_rate": 9.82178219329431e-05, "loss": 0.052, "step": 6550 }, { "epoch": 4.729632299927902, "grad_norm": 0.20467931032180786, "learning_rate": 9.820906100658789e-05, "loss": 0.0557, "step": 6560 }, { "epoch": 4.7368421052631575, "grad_norm": 0.14183317124843597, "learning_rate": 9.82002789919515e-05, "loss": 0.0483, "step": 6570 }, { "epoch": 4.744051910598413, "grad_norm": 0.22055140137672424, "learning_rate": 9.819147589287554e-05, "loss": 0.0601, "step": 6580 }, { "epoch": 4.75126171593367, "grad_norm": 0.23363535106182098, "learning_rate": 9.818265171321074e-05, "loss": 0.0523, "step": 6590 }, { "epoch": 4.758471521268926, "grad_norm": 0.16488324105739594, "learning_rate": 9.817380645681714e-05, "loss": 0.0476, "step": 6600 }, { "epoch": 4.765681326604182, "grad_norm": 0.18884852528572083, "learning_rate": 9.816494012756392e-05, "loss": 0.0493, "step": 6610 }, { "epoch": 4.772891131939438, "grad_norm": 0.21405726671218872, "learning_rate": 9.815605272932953e-05, "loss": 0.0503, "step": 6620 }, { "epoch": 4.780100937274693, "grad_norm": 0.1462121605873108, "learning_rate": 9.814714426600162e-05, "loss": 0.0514, "step": 6630 }, { "epoch": 4.787310742609949, "grad_norm": 0.15407629311084747, "learning_rate": 9.813821474147704e-05, "loss": 0.0481, "step": 6640 }, { "epoch": 4.794520547945205, "grad_norm": 0.1392265111207962, "learning_rate": 9.812926415966189e-05, "loss": 0.0462, "step": 6650 }, { "epoch": 4.801730353280462, "grad_norm": 0.15766873955726624, "learning_rate": 9.812029252447143e-05, "loss": 0.0473, "step": 6660 }, { "epoch": 4.808940158615718, "grad_norm": 0.19978514313697815, "learning_rate": 9.811129983983018e-05, "loss": 0.0445, "step": 6670 }, { "epoch": 4.8161499639509735, "grad_norm": 0.2496848702430725, "learning_rate": 9.810228610967185e-05, "loss": 0.051, "step": 6680 }, { "epoch": 4.823359769286229, "grad_norm": 0.2143443077802658, "learning_rate": 9.809325133793934e-05, "loss": 0.0472, "step": 6690 }, { "epoch": 4.830569574621485, "grad_norm": 0.12277713418006897, "learning_rate": 9.808419552858477e-05, "loss": 0.0527, "step": 6700 }, { "epoch": 4.837779379956741, "grad_norm": 0.17387908697128296, "learning_rate": 9.807511868556944e-05, "loss": 0.0439, "step": 6710 }, { "epoch": 4.844989185291997, "grad_norm": 0.16376706957817078, "learning_rate": 9.806602081286392e-05, "loss": 0.0508, "step": 6720 }, { "epoch": 4.852198990627253, "grad_norm": 0.1614656001329422, "learning_rate": 9.805690191444785e-05, "loss": 0.0473, "step": 6730 }, { "epoch": 4.8594087959625085, "grad_norm": 0.17089170217514038, "learning_rate": 9.804776199431022e-05, "loss": 0.052, "step": 6740 }, { "epoch": 4.866618601297765, "grad_norm": 0.18324384093284607, "learning_rate": 9.80386010564491e-05, "loss": 0.0477, "step": 6750 }, { "epoch": 4.873828406633021, "grad_norm": 0.21448150277137756, "learning_rate": 9.802941910487183e-05, "loss": 0.0446, "step": 6760 }, { "epoch": 4.881038211968277, "grad_norm": 0.13410672545433044, "learning_rate": 9.802021614359487e-05, "loss": 0.0458, "step": 6770 }, { "epoch": 4.888248017303533, "grad_norm": 0.15462645888328552, "learning_rate": 9.801099217664394e-05, "loss": 0.0441, "step": 6780 }, { "epoch": 4.895457822638789, "grad_norm": 0.17705556750297546, "learning_rate": 9.800174720805388e-05, "loss": 0.0512, "step": 6790 }, { "epoch": 4.902667627974044, "grad_norm": 0.14443400502204895, "learning_rate": 9.799248124186878e-05, "loss": 0.0522, "step": 6800 }, { "epoch": 4.9098774333093, "grad_norm": 0.17187006771564484, "learning_rate": 9.798319428214187e-05, "loss": 0.0491, "step": 6810 }, { "epoch": 4.917087238644557, "grad_norm": 0.2167801856994629, "learning_rate": 9.79738863329356e-05, "loss": 0.0476, "step": 6820 }, { "epoch": 4.924297043979813, "grad_norm": 0.1890256106853485, "learning_rate": 9.79645573983216e-05, "loss": 0.0439, "step": 6830 }, { "epoch": 4.931506849315069, "grad_norm": 0.180909663438797, "learning_rate": 9.795520748238062e-05, "loss": 0.0492, "step": 6840 }, { "epoch": 4.9387166546503245, "grad_norm": 0.21027818322181702, "learning_rate": 9.794583658920264e-05, "loss": 0.0445, "step": 6850 }, { "epoch": 4.94592645998558, "grad_norm": 0.15283864736557007, "learning_rate": 9.793644472288683e-05, "loss": 0.0481, "step": 6860 }, { "epoch": 4.953136265320836, "grad_norm": 0.13477303087711334, "learning_rate": 9.79270318875415e-05, "loss": 0.0575, "step": 6870 }, { "epoch": 4.960346070656092, "grad_norm": 0.18521997332572937, "learning_rate": 9.791759808728416e-05, "loss": 0.0518, "step": 6880 }, { "epoch": 4.967555875991348, "grad_norm": 0.20288215577602386, "learning_rate": 9.790814332624143e-05, "loss": 0.0523, "step": 6890 }, { "epoch": 4.974765681326604, "grad_norm": 0.15939636528491974, "learning_rate": 9.789866760854919e-05, "loss": 0.0449, "step": 6900 }, { "epoch": 4.98197548666186, "grad_norm": 0.21130365133285522, "learning_rate": 9.788917093835242e-05, "loss": 0.0536, "step": 6910 }, { "epoch": 4.989185291997116, "grad_norm": 0.22387129068374634, "learning_rate": 9.787965331980528e-05, "loss": 0.0576, "step": 6920 }, { "epoch": 4.996395097332372, "grad_norm": 0.1814258098602295, "learning_rate": 9.787011475707111e-05, "loss": 0.0538, "step": 6930 }, { "epoch": 5.003604902667628, "grad_norm": 0.17256957292556763, "learning_rate": 9.786055525432241e-05, "loss": 0.0504, "step": 6940 }, { "epoch": 5.010814708002884, "grad_norm": 0.1694738268852234, "learning_rate": 9.78509748157408e-05, "loss": 0.0522, "step": 6950 }, { "epoch": 5.01802451333814, "grad_norm": 0.1944543421268463, "learning_rate": 9.784137344551713e-05, "loss": 0.0523, "step": 6960 }, { "epoch": 5.025234318673395, "grad_norm": 0.20387686789035797, "learning_rate": 9.783175114785134e-05, "loss": 0.0428, "step": 6970 }, { "epoch": 5.032444124008652, "grad_norm": 0.18022307753562927, "learning_rate": 9.782210792695254e-05, "loss": 0.0538, "step": 6980 }, { "epoch": 5.039653929343908, "grad_norm": 0.19216349720954895, "learning_rate": 9.781244378703904e-05, "loss": 0.0467, "step": 6990 }, { "epoch": 5.046863734679164, "grad_norm": 0.14135049283504486, "learning_rate": 9.780275873233824e-05, "loss": 0.0462, "step": 7000 }, { "epoch": 5.05407354001442, "grad_norm": 0.20069660246372223, "learning_rate": 9.77930527670867e-05, "loss": 0.0502, "step": 7010 }, { "epoch": 5.0612833453496755, "grad_norm": 0.1934470534324646, "learning_rate": 9.778332589553016e-05, "loss": 0.0435, "step": 7020 }, { "epoch": 5.068493150684931, "grad_norm": 0.1719396710395813, "learning_rate": 9.777357812192349e-05, "loss": 0.0446, "step": 7030 }, { "epoch": 5.075702956020187, "grad_norm": 0.2212192267179489, "learning_rate": 9.776380945053066e-05, "loss": 0.0508, "step": 7040 }, { "epoch": 5.082912761355443, "grad_norm": 0.14546290040016174, "learning_rate": 9.775401988562486e-05, "loss": 0.058, "step": 7050 }, { "epoch": 5.0901225666907, "grad_norm": 0.12389754503965378, "learning_rate": 9.774420943148835e-05, "loss": 0.0456, "step": 7060 }, { "epoch": 5.097332372025956, "grad_norm": 0.1755615919828415, "learning_rate": 9.773437809241257e-05, "loss": 0.0519, "step": 7070 }, { "epoch": 5.104542177361211, "grad_norm": 0.12292484939098358, "learning_rate": 9.772452587269808e-05, "loss": 0.0409, "step": 7080 }, { "epoch": 5.111751982696467, "grad_norm": 0.1524939239025116, "learning_rate": 9.771465277665457e-05, "loss": 0.0462, "step": 7090 }, { "epoch": 5.118961788031723, "grad_norm": 0.19848859310150146, "learning_rate": 9.770475880860089e-05, "loss": 0.0465, "step": 7100 }, { "epoch": 5.126171593366979, "grad_norm": 0.16713795065879822, "learning_rate": 9.769484397286495e-05, "loss": 0.0502, "step": 7110 }, { "epoch": 5.133381398702235, "grad_norm": 0.13317836821079254, "learning_rate": 9.768490827378388e-05, "loss": 0.0446, "step": 7120 }, { "epoch": 5.140591204037491, "grad_norm": 0.18160848319530487, "learning_rate": 9.767495171570387e-05, "loss": 0.042, "step": 7130 }, { "epoch": 5.147801009372747, "grad_norm": 0.12028345465660095, "learning_rate": 9.766497430298027e-05, "loss": 0.046, "step": 7140 }, { "epoch": 5.155010814708003, "grad_norm": 0.17041347920894623, "learning_rate": 9.765497603997754e-05, "loss": 0.0583, "step": 7150 }, { "epoch": 5.162220620043259, "grad_norm": 0.18683646619319916, "learning_rate": 9.764495693106926e-05, "loss": 0.0516, "step": 7160 }, { "epoch": 5.169430425378515, "grad_norm": 0.17492178082466125, "learning_rate": 9.76349169806381e-05, "loss": 0.0487, "step": 7170 }, { "epoch": 5.176640230713771, "grad_norm": 0.18492963910102844, "learning_rate": 9.762485619307591e-05, "loss": 0.0545, "step": 7180 }, { "epoch": 5.1838500360490265, "grad_norm": 0.1211942583322525, "learning_rate": 9.76147745727836e-05, "loss": 0.0509, "step": 7190 }, { "epoch": 5.191059841384282, "grad_norm": 0.22250448167324066, "learning_rate": 9.760467212417124e-05, "loss": 0.047, "step": 7200 }, { "epoch": 5.198269646719538, "grad_norm": 0.19178585708141327, "learning_rate": 9.759454885165796e-05, "loss": 0.0422, "step": 7210 }, { "epoch": 5.205479452054795, "grad_norm": 0.21112512052059174, "learning_rate": 9.7584404759672e-05, "loss": 0.0499, "step": 7220 }, { "epoch": 5.212689257390051, "grad_norm": 0.2010343074798584, "learning_rate": 9.75742398526508e-05, "loss": 0.0496, "step": 7230 }, { "epoch": 5.219899062725307, "grad_norm": 0.16496552526950836, "learning_rate": 9.756405413504077e-05, "loss": 0.0481, "step": 7240 }, { "epoch": 5.227108868060562, "grad_norm": 0.1871640831232071, "learning_rate": 9.755384761129752e-05, "loss": 0.0531, "step": 7250 }, { "epoch": 5.234318673395818, "grad_norm": 0.1707841157913208, "learning_rate": 9.754362028588572e-05, "loss": 0.0451, "step": 7260 }, { "epoch": 5.241528478731074, "grad_norm": 0.16406908631324768, "learning_rate": 9.753337216327917e-05, "loss": 0.0506, "step": 7270 }, { "epoch": 5.24873828406633, "grad_norm": 0.1550709754228592, "learning_rate": 9.752310324796073e-05, "loss": 0.0486, "step": 7280 }, { "epoch": 5.255948089401586, "grad_norm": 0.17359375953674316, "learning_rate": 9.751281354442235e-05, "loss": 0.0466, "step": 7290 }, { "epoch": 5.2631578947368425, "grad_norm": 0.17167064547538757, "learning_rate": 9.750250305716513e-05, "loss": 0.0503, "step": 7300 }, { "epoch": 5.270367700072098, "grad_norm": 0.16905170679092407, "learning_rate": 9.749217179069923e-05, "loss": 0.0523, "step": 7310 }, { "epoch": 5.277577505407354, "grad_norm": 0.1909712255001068, "learning_rate": 9.748181974954386e-05, "loss": 0.0453, "step": 7320 }, { "epoch": 5.28478731074261, "grad_norm": 0.16202345490455627, "learning_rate": 9.74714469382274e-05, "loss": 0.0449, "step": 7330 }, { "epoch": 5.291997116077866, "grad_norm": 0.17505240440368652, "learning_rate": 9.746105336128724e-05, "loss": 0.0446, "step": 7340 }, { "epoch": 5.299206921413122, "grad_norm": 0.17237621545791626, "learning_rate": 9.745063902326988e-05, "loss": 0.046, "step": 7350 }, { "epoch": 5.3064167267483775, "grad_norm": 0.13983570039272308, "learning_rate": 9.744020392873093e-05, "loss": 0.0436, "step": 7360 }, { "epoch": 5.313626532083633, "grad_norm": 0.1920827329158783, "learning_rate": 9.742974808223504e-05, "loss": 0.0452, "step": 7370 }, { "epoch": 5.32083633741889, "grad_norm": 0.15241971611976624, "learning_rate": 9.741927148835596e-05, "loss": 0.0507, "step": 7380 }, { "epoch": 5.328046142754146, "grad_norm": 0.21636225283145905, "learning_rate": 9.740877415167648e-05, "loss": 0.0483, "step": 7390 }, { "epoch": 5.335255948089402, "grad_norm": 0.1500992327928543, "learning_rate": 9.739825607678854e-05, "loss": 0.0506, "step": 7400 }, { "epoch": 5.342465753424658, "grad_norm": 0.16107912361621857, "learning_rate": 9.738771726829308e-05, "loss": 0.0424, "step": 7410 }, { "epoch": 5.349675558759913, "grad_norm": 0.15940718352794647, "learning_rate": 9.737715773080011e-05, "loss": 0.0442, "step": 7420 }, { "epoch": 5.356885364095169, "grad_norm": 0.11791400611400604, "learning_rate": 9.736657746892877e-05, "loss": 0.046, "step": 7430 }, { "epoch": 5.364095169430425, "grad_norm": 0.12350351363420486, "learning_rate": 9.735597648730718e-05, "loss": 0.0445, "step": 7440 }, { "epoch": 5.371304974765681, "grad_norm": 0.1528310626745224, "learning_rate": 9.734535479057262e-05, "loss": 0.0486, "step": 7450 }, { "epoch": 5.378514780100938, "grad_norm": 0.14609383046627045, "learning_rate": 9.733471238337136e-05, "loss": 0.0411, "step": 7460 }, { "epoch": 5.3857245854361935, "grad_norm": 0.16386030614376068, "learning_rate": 9.732404927035871e-05, "loss": 0.0519, "step": 7470 }, { "epoch": 5.392934390771449, "grad_norm": 0.1795184314250946, "learning_rate": 9.731336545619915e-05, "loss": 0.0444, "step": 7480 }, { "epoch": 5.400144196106705, "grad_norm": 0.1311633586883545, "learning_rate": 9.73026609455661e-05, "loss": 0.0492, "step": 7490 }, { "epoch": 5.407354001441961, "grad_norm": 0.1920330822467804, "learning_rate": 9.72919357431421e-05, "loss": 0.0518, "step": 7500 }, { "epoch": 5.414563806777217, "grad_norm": 0.17452634871006012, "learning_rate": 9.72811898536187e-05, "loss": 0.0473, "step": 7510 }, { "epoch": 5.421773612112473, "grad_norm": 0.18838174641132355, "learning_rate": 9.72704232816965e-05, "loss": 0.0443, "step": 7520 }, { "epoch": 5.4289834174477285, "grad_norm": 0.16849026083946228, "learning_rate": 9.725963603208519e-05, "loss": 0.0486, "step": 7530 }, { "epoch": 5.436193222782985, "grad_norm": 0.1430199295282364, "learning_rate": 9.724882810950348e-05, "loss": 0.0493, "step": 7540 }, { "epoch": 5.443403028118241, "grad_norm": 0.17742541432380676, "learning_rate": 9.723799951867913e-05, "loss": 0.0416, "step": 7550 }, { "epoch": 5.450612833453497, "grad_norm": 0.14732041954994202, "learning_rate": 9.722715026434889e-05, "loss": 0.0429, "step": 7560 }, { "epoch": 5.457822638788753, "grad_norm": 0.19620446860790253, "learning_rate": 9.721628035125866e-05, "loss": 0.0473, "step": 7570 }, { "epoch": 5.465032444124009, "grad_norm": 0.20316976308822632, "learning_rate": 9.720538978416325e-05, "loss": 0.059, "step": 7580 }, { "epoch": 5.472242249459264, "grad_norm": 0.2322544902563095, "learning_rate": 9.71944785678266e-05, "loss": 0.0552, "step": 7590 }, { "epoch": 5.47945205479452, "grad_norm": 0.20567038655281067, "learning_rate": 9.718354670702161e-05, "loss": 0.0531, "step": 7600 }, { "epoch": 5.486661860129776, "grad_norm": 0.18127793073654175, "learning_rate": 9.717259420653029e-05, "loss": 0.0441, "step": 7610 }, { "epoch": 5.493871665465033, "grad_norm": 0.14843401312828064, "learning_rate": 9.716162107114361e-05, "loss": 0.0483, "step": 7620 }, { "epoch": 5.501081470800289, "grad_norm": 0.16953600943088531, "learning_rate": 9.71506273056616e-05, "loss": 0.0475, "step": 7630 }, { "epoch": 5.5082912761355445, "grad_norm": 0.14634445309638977, "learning_rate": 9.713961291489331e-05, "loss": 0.046, "step": 7640 }, { "epoch": 5.5155010814708, "grad_norm": 0.12240735441446304, "learning_rate": 9.71285779036568e-05, "loss": 0.045, "step": 7650 }, { "epoch": 5.522710886806056, "grad_norm": 0.16823828220367432, "learning_rate": 9.711752227677916e-05, "loss": 0.0503, "step": 7660 }, { "epoch": 5.529920692141312, "grad_norm": 0.2309502810239792, "learning_rate": 9.710644603909649e-05, "loss": 0.0488, "step": 7670 }, { "epoch": 5.537130497476568, "grad_norm": 0.1956491619348526, "learning_rate": 9.709534919545393e-05, "loss": 0.0469, "step": 7680 }, { "epoch": 5.544340302811824, "grad_norm": 0.1404716968536377, "learning_rate": 9.708423175070563e-05, "loss": 0.0468, "step": 7690 }, { "epoch": 5.55155010814708, "grad_norm": 0.1668831706047058, "learning_rate": 9.707309370971468e-05, "loss": 0.0441, "step": 7700 }, { "epoch": 5.558759913482336, "grad_norm": 0.1557563841342926, "learning_rate": 9.70619350773533e-05, "loss": 0.0417, "step": 7710 }, { "epoch": 5.565969718817592, "grad_norm": 0.13992707431316376, "learning_rate": 9.70507558585026e-05, "loss": 0.0423, "step": 7720 }, { "epoch": 5.573179524152848, "grad_norm": 0.2044336199760437, "learning_rate": 9.703955605805281e-05, "loss": 0.0477, "step": 7730 }, { "epoch": 5.580389329488104, "grad_norm": 0.11911238729953766, "learning_rate": 9.702833568090306e-05, "loss": 0.0444, "step": 7740 }, { "epoch": 5.58759913482336, "grad_norm": 0.19541393220424652, "learning_rate": 9.701709473196154e-05, "loss": 0.0449, "step": 7750 }, { "epoch": 5.594808940158615, "grad_norm": 0.22896307706832886, "learning_rate": 9.700583321614541e-05, "loss": 0.0454, "step": 7760 }, { "epoch": 5.602018745493872, "grad_norm": 0.2140805870294571, "learning_rate": 9.699455113838085e-05, "loss": 0.043, "step": 7770 }, { "epoch": 5.609228550829128, "grad_norm": 0.14072799682617188, "learning_rate": 9.698324850360303e-05, "loss": 0.0486, "step": 7780 }, { "epoch": 5.616438356164384, "grad_norm": 0.17901231348514557, "learning_rate": 9.69719253167561e-05, "loss": 0.0506, "step": 7790 }, { "epoch": 5.62364816149964, "grad_norm": 0.10481642931699753, "learning_rate": 9.69605815827932e-05, "loss": 0.0432, "step": 7800 }, { "epoch": 5.6308579668348955, "grad_norm": 0.1902894675731659, "learning_rate": 9.694921730667647e-05, "loss": 0.046, "step": 7810 }, { "epoch": 5.638067772170151, "grad_norm": 0.229942187666893, "learning_rate": 9.693783249337705e-05, "loss": 0.0514, "step": 7820 }, { "epoch": 5.645277577505407, "grad_norm": 0.20562885701656342, "learning_rate": 9.692642714787501e-05, "loss": 0.054, "step": 7830 }, { "epoch": 5.652487382840663, "grad_norm": 0.24603232741355896, "learning_rate": 9.691500127515945e-05, "loss": 0.0456, "step": 7840 }, { "epoch": 5.659697188175919, "grad_norm": 0.16113124787807465, "learning_rate": 9.690355488022844e-05, "loss": 0.0472, "step": 7850 }, { "epoch": 5.6669069935111755, "grad_norm": 0.13945691287517548, "learning_rate": 9.689208796808902e-05, "loss": 0.0431, "step": 7860 }, { "epoch": 5.674116798846431, "grad_norm": 0.16482126712799072, "learning_rate": 9.68806005437572e-05, "loss": 0.0464, "step": 7870 }, { "epoch": 5.681326604181687, "grad_norm": 0.12832143902778625, "learning_rate": 9.686909261225796e-05, "loss": 0.0442, "step": 7880 }, { "epoch": 5.688536409516943, "grad_norm": 0.13845741748809814, "learning_rate": 9.685756417862531e-05, "loss": 0.0532, "step": 7890 }, { "epoch": 5.695746214852199, "grad_norm": 0.14171598851680756, "learning_rate": 9.684601524790212e-05, "loss": 0.0448, "step": 7900 }, { "epoch": 5.702956020187455, "grad_norm": 0.13420496881008148, "learning_rate": 9.68344458251403e-05, "loss": 0.047, "step": 7910 }, { "epoch": 5.710165825522711, "grad_norm": 0.16180935502052307, "learning_rate": 9.682285591540072e-05, "loss": 0.0487, "step": 7920 }, { "epoch": 5.717375630857967, "grad_norm": 0.11420930176973343, "learning_rate": 9.681124552375322e-05, "loss": 0.039, "step": 7930 }, { "epoch": 5.724585436193223, "grad_norm": 0.15575748682022095, "learning_rate": 9.679961465527654e-05, "loss": 0.0452, "step": 7940 }, { "epoch": 5.731795241528479, "grad_norm": 0.21025165915489197, "learning_rate": 9.678796331505843e-05, "loss": 0.0508, "step": 7950 }, { "epoch": 5.739005046863735, "grad_norm": 0.19661934673786163, "learning_rate": 9.677629150819558e-05, "loss": 0.0518, "step": 7960 }, { "epoch": 5.746214852198991, "grad_norm": 0.17563681304454803, "learning_rate": 9.676459923979366e-05, "loss": 0.0477, "step": 7970 }, { "epoch": 5.7534246575342465, "grad_norm": 0.18979130685329437, "learning_rate": 9.675288651496723e-05, "loss": 0.0499, "step": 7980 }, { "epoch": 5.760634462869502, "grad_norm": 0.16382010281085968, "learning_rate": 9.674115333883986e-05, "loss": 0.0522, "step": 7990 }, { "epoch": 5.767844268204758, "grad_norm": 0.16383595764636993, "learning_rate": 9.672939971654402e-05, "loss": 0.0458, "step": 8000 }, { "epoch": 5.775054073540014, "grad_norm": 0.11292503774166107, "learning_rate": 9.671762565322117e-05, "loss": 0.0437, "step": 8010 }, { "epoch": 5.782263878875271, "grad_norm": 0.1309163123369217, "learning_rate": 9.670583115402166e-05, "loss": 0.0467, "step": 8020 }, { "epoch": 5.7894736842105265, "grad_norm": 0.17481744289398193, "learning_rate": 9.669401622410482e-05, "loss": 0.0474, "step": 8030 }, { "epoch": 5.796683489545782, "grad_norm": 0.10601960122585297, "learning_rate": 9.668218086863887e-05, "loss": 0.0432, "step": 8040 }, { "epoch": 5.803893294881038, "grad_norm": 0.11381493508815765, "learning_rate": 9.667032509280103e-05, "loss": 0.0429, "step": 8050 }, { "epoch": 5.811103100216294, "grad_norm": 0.1482740342617035, "learning_rate": 9.665844890177743e-05, "loss": 0.0455, "step": 8060 }, { "epoch": 5.81831290555155, "grad_norm": 0.14296264946460724, "learning_rate": 9.664655230076309e-05, "loss": 0.0523, "step": 8070 }, { "epoch": 5.825522710886806, "grad_norm": 0.22824065387248993, "learning_rate": 9.663463529496199e-05, "loss": 0.0456, "step": 8080 }, { "epoch": 5.8327325162220625, "grad_norm": 0.13579711318016052, "learning_rate": 9.662269788958705e-05, "loss": 0.0408, "step": 8090 }, { "epoch": 5.839942321557318, "grad_norm": 0.15502944588661194, "learning_rate": 9.661074008986008e-05, "loss": 0.0455, "step": 8100 }, { "epoch": 5.847152126892574, "grad_norm": 0.17692695558071136, "learning_rate": 9.659876190101184e-05, "loss": 0.044, "step": 8110 }, { "epoch": 5.85436193222783, "grad_norm": 0.2028454691171646, "learning_rate": 9.6586763328282e-05, "loss": 0.0427, "step": 8120 }, { "epoch": 5.861571737563086, "grad_norm": 0.14011463522911072, "learning_rate": 9.657474437691913e-05, "loss": 0.0465, "step": 8130 }, { "epoch": 5.868781542898342, "grad_norm": 0.16014635562896729, "learning_rate": 9.656270505218073e-05, "loss": 0.0426, "step": 8140 }, { "epoch": 5.8759913482335975, "grad_norm": 0.1986118108034134, "learning_rate": 9.655064535933321e-05, "loss": 0.0517, "step": 8150 }, { "epoch": 5.883201153568853, "grad_norm": 0.13810932636260986, "learning_rate": 9.653856530365189e-05, "loss": 0.0438, "step": 8160 }, { "epoch": 5.890410958904109, "grad_norm": 0.16285566985607147, "learning_rate": 9.6526464890421e-05, "loss": 0.0475, "step": 8170 }, { "epoch": 5.897620764239366, "grad_norm": 0.16465730965137482, "learning_rate": 9.651434412493367e-05, "loss": 0.0467, "step": 8180 }, { "epoch": 5.904830569574622, "grad_norm": 0.12811750173568726, "learning_rate": 9.650220301249195e-05, "loss": 0.0502, "step": 8190 }, { "epoch": 5.9120403749098775, "grad_norm": 0.1591353416442871, "learning_rate": 9.649004155840675e-05, "loss": 0.0514, "step": 8200 }, { "epoch": 5.919250180245133, "grad_norm": 0.12089364975690842, "learning_rate": 9.647785976799792e-05, "loss": 0.0426, "step": 8210 }, { "epoch": 5.926459985580389, "grad_norm": 0.14575766026973724, "learning_rate": 9.646565764659417e-05, "loss": 0.0498, "step": 8220 }, { "epoch": 5.933669790915645, "grad_norm": 0.15653273463249207, "learning_rate": 9.645343519953314e-05, "loss": 0.0454, "step": 8230 }, { "epoch": 5.940879596250901, "grad_norm": 0.16614729166030884, "learning_rate": 9.644119243216135e-05, "loss": 0.0472, "step": 8240 }, { "epoch": 5.948089401586158, "grad_norm": 0.18760310113430023, "learning_rate": 9.642892934983417e-05, "loss": 0.0419, "step": 8250 }, { "epoch": 5.9552992069214135, "grad_norm": 0.19401417672634125, "learning_rate": 9.641664595791593e-05, "loss": 0.045, "step": 8260 }, { "epoch": 5.962509012256669, "grad_norm": 0.13059768080711365, "learning_rate": 9.640434226177977e-05, "loss": 0.0496, "step": 8270 }, { "epoch": 5.969718817591925, "grad_norm": 0.14102578163146973, "learning_rate": 9.639201826680777e-05, "loss": 0.0526, "step": 8280 }, { "epoch": 5.976928622927181, "grad_norm": 0.14239242672920227, "learning_rate": 9.637967397839083e-05, "loss": 0.047, "step": 8290 }, { "epoch": 5.984138428262437, "grad_norm": 0.1495516151189804, "learning_rate": 9.63673094019288e-05, "loss": 0.0414, "step": 8300 }, { "epoch": 5.991348233597693, "grad_norm": 0.16398315131664276, "learning_rate": 9.635492454283035e-05, "loss": 0.0501, "step": 8310 }, { "epoch": 5.9985580389329485, "grad_norm": 0.14243890345096588, "learning_rate": 9.634251940651302e-05, "loss": 0.0512, "step": 8320 }, { "epoch": 6.005767844268204, "grad_norm": 0.19458752870559692, "learning_rate": 9.633009399840327e-05, "loss": 0.0472, "step": 8330 }, { "epoch": 6.012977649603461, "grad_norm": 0.16707323491573334, "learning_rate": 9.631764832393639e-05, "loss": 0.045, "step": 8340 }, { "epoch": 6.020187454938717, "grad_norm": 0.22155843675136566, "learning_rate": 9.630518238855651e-05, "loss": 0.0455, "step": 8350 }, { "epoch": 6.027397260273973, "grad_norm": 0.18223820626735687, "learning_rate": 9.629269619771668e-05, "loss": 0.0436, "step": 8360 }, { "epoch": 6.0346070656092285, "grad_norm": 0.15878844261169434, "learning_rate": 9.628018975687878e-05, "loss": 0.0428, "step": 8370 }, { "epoch": 6.041816870944484, "grad_norm": 0.16510401666164398, "learning_rate": 9.626766307151355e-05, "loss": 0.0456, "step": 8380 }, { "epoch": 6.04902667627974, "grad_norm": 0.12214291840791702, "learning_rate": 9.62551161471006e-05, "loss": 0.0489, "step": 8390 }, { "epoch": 6.056236481614996, "grad_norm": 0.11704742163419724, "learning_rate": 9.624254898912837e-05, "loss": 0.0452, "step": 8400 }, { "epoch": 6.063446286950253, "grad_norm": 0.1671050786972046, "learning_rate": 9.622996160309414e-05, "loss": 0.0424, "step": 8410 }, { "epoch": 6.070656092285509, "grad_norm": 0.17762961983680725, "learning_rate": 9.62173539945041e-05, "loss": 0.0455, "step": 8420 }, { "epoch": 6.0778658976207645, "grad_norm": 0.1599046289920807, "learning_rate": 9.620472616887323e-05, "loss": 0.04, "step": 8430 }, { "epoch": 6.08507570295602, "grad_norm": 0.16985882818698883, "learning_rate": 9.619207813172536e-05, "loss": 0.0407, "step": 8440 }, { "epoch": 6.092285508291276, "grad_norm": 0.1445522904396057, "learning_rate": 9.617940988859318e-05, "loss": 0.0413, "step": 8450 }, { "epoch": 6.099495313626532, "grad_norm": 0.10539346188306808, "learning_rate": 9.616672144501821e-05, "loss": 0.0409, "step": 8460 }, { "epoch": 6.106705118961788, "grad_norm": 0.1761390119791031, "learning_rate": 9.615401280655081e-05, "loss": 0.0441, "step": 8470 }, { "epoch": 6.113914924297044, "grad_norm": 0.15678349137306213, "learning_rate": 9.614128397875017e-05, "loss": 0.0379, "step": 8480 }, { "epoch": 6.1211247296322995, "grad_norm": 0.19255134463310242, "learning_rate": 9.612853496718429e-05, "loss": 0.0595, "step": 8490 }, { "epoch": 6.128334534967556, "grad_norm": 0.14806197583675385, "learning_rate": 9.611576577743004e-05, "loss": 0.038, "step": 8500 }, { "epoch": 6.135544340302812, "grad_norm": 0.19901494681835175, "learning_rate": 9.610297641507313e-05, "loss": 0.0469, "step": 8510 }, { "epoch": 6.142754145638068, "grad_norm": 0.13995501399040222, "learning_rate": 9.6090166885708e-05, "loss": 0.047, "step": 8520 }, { "epoch": 6.149963950973324, "grad_norm": 0.1443891078233719, "learning_rate": 9.607733719493798e-05, "loss": 0.0424, "step": 8530 }, { "epoch": 6.1571737563085795, "grad_norm": 0.19468629360198975, "learning_rate": 9.606448734837527e-05, "loss": 0.041, "step": 8540 }, { "epoch": 6.164383561643835, "grad_norm": 0.08396469056606293, "learning_rate": 9.605161735164079e-05, "loss": 0.0449, "step": 8550 }, { "epoch": 6.171593366979091, "grad_norm": 0.2472797930240631, "learning_rate": 9.60387272103643e-05, "loss": 0.047, "step": 8560 }, { "epoch": 6.178803172314348, "grad_norm": 0.08976095914840698, "learning_rate": 9.60258169301844e-05, "loss": 0.0379, "step": 8570 }, { "epoch": 6.186012977649604, "grad_norm": 0.1304633915424347, "learning_rate": 9.601288651674851e-05, "loss": 0.0465, "step": 8580 }, { "epoch": 6.19322278298486, "grad_norm": 0.17906349897384644, "learning_rate": 9.59999359757128e-05, "loss": 0.0451, "step": 8590 }, { "epoch": 6.2004325883201155, "grad_norm": 0.14452728629112244, "learning_rate": 9.598696531274227e-05, "loss": 0.0452, "step": 8600 }, { "epoch": 6.207642393655371, "grad_norm": 0.1541965752840042, "learning_rate": 9.597397453351076e-05, "loss": 0.0462, "step": 8610 }, { "epoch": 6.214852198990627, "grad_norm": 0.1422322541475296, "learning_rate": 9.596096364370087e-05, "loss": 0.0451, "step": 8620 }, { "epoch": 6.222062004325883, "grad_norm": 0.16331785917282104, "learning_rate": 9.594793264900399e-05, "loss": 0.0391, "step": 8630 }, { "epoch": 6.229271809661139, "grad_norm": 0.1459251046180725, "learning_rate": 9.593488155512032e-05, "loss": 0.0428, "step": 8640 }, { "epoch": 6.236481614996395, "grad_norm": 0.12833479046821594, "learning_rate": 9.592181036775886e-05, "loss": 0.0443, "step": 8650 }, { "epoch": 6.243691420331651, "grad_norm": 0.16422095894813538, "learning_rate": 9.590871909263741e-05, "loss": 0.0456, "step": 8660 }, { "epoch": 6.250901225666907, "grad_norm": 0.13764287531375885, "learning_rate": 9.589560773548252e-05, "loss": 0.0471, "step": 8670 }, { "epoch": 6.258111031002163, "grad_norm": 0.16352051496505737, "learning_rate": 9.588247630202952e-05, "loss": 0.0496, "step": 8680 }, { "epoch": 6.265320836337419, "grad_norm": 0.15415631234645844, "learning_rate": 9.586932479802258e-05, "loss": 0.0475, "step": 8690 }, { "epoch": 6.272530641672675, "grad_norm": 0.14822280406951904, "learning_rate": 9.585615322921462e-05, "loss": 0.0434, "step": 8700 }, { "epoch": 6.2797404470079305, "grad_norm": 0.2211928814649582, "learning_rate": 9.58429616013673e-05, "loss": 0.0422, "step": 8710 }, { "epoch": 6.286950252343186, "grad_norm": 0.17235185205936432, "learning_rate": 9.58297499202511e-05, "loss": 0.0449, "step": 8720 }, { "epoch": 6.294160057678443, "grad_norm": 0.1866975575685501, "learning_rate": 9.581651819164525e-05, "loss": 0.0502, "step": 8730 }, { "epoch": 6.301369863013699, "grad_norm": 0.21170639991760254, "learning_rate": 9.580326642133777e-05, "loss": 0.0432, "step": 8740 }, { "epoch": 6.308579668348955, "grad_norm": 0.17629501223564148, "learning_rate": 9.578999461512544e-05, "loss": 0.053, "step": 8750 }, { "epoch": 6.315789473684211, "grad_norm": 0.1864372044801712, "learning_rate": 9.577670277881378e-05, "loss": 0.0471, "step": 8760 }, { "epoch": 6.3229992790194665, "grad_norm": 0.17137596011161804, "learning_rate": 9.57633909182171e-05, "loss": 0.045, "step": 8770 }, { "epoch": 6.330209084354722, "grad_norm": 0.15789373219013214, "learning_rate": 9.575005903915847e-05, "loss": 0.0461, "step": 8780 }, { "epoch": 6.337418889689978, "grad_norm": 0.1758284866809845, "learning_rate": 9.573670714746972e-05, "loss": 0.0552, "step": 8790 }, { "epoch": 6.344628695025234, "grad_norm": 0.16164623200893402, "learning_rate": 9.572333524899136e-05, "loss": 0.0467, "step": 8800 }, { "epoch": 6.35183850036049, "grad_norm": 0.11156054586172104, "learning_rate": 9.570994334957278e-05, "loss": 0.05, "step": 8810 }, { "epoch": 6.3590483056957465, "grad_norm": 0.16475297510623932, "learning_rate": 9.569653145507201e-05, "loss": 0.0382, "step": 8820 }, { "epoch": 6.366258111031002, "grad_norm": 0.16826355457305908, "learning_rate": 9.568309957135587e-05, "loss": 0.0519, "step": 8830 }, { "epoch": 6.373467916366258, "grad_norm": 0.16514120995998383, "learning_rate": 9.566964770429995e-05, "loss": 0.0383, "step": 8840 }, { "epoch": 6.380677721701514, "grad_norm": 0.1746404469013214, "learning_rate": 9.565617585978853e-05, "loss": 0.048, "step": 8850 }, { "epoch": 6.38788752703677, "grad_norm": 0.17793455719947815, "learning_rate": 9.564268404371466e-05, "loss": 0.0508, "step": 8860 }, { "epoch": 6.395097332372026, "grad_norm": 0.1469440907239914, "learning_rate": 9.562917226198013e-05, "loss": 0.0406, "step": 8870 }, { "epoch": 6.4023071377072815, "grad_norm": 0.20800550282001495, "learning_rate": 9.561564052049543e-05, "loss": 0.0448, "step": 8880 }, { "epoch": 6.409516943042538, "grad_norm": 0.12700048089027405, "learning_rate": 9.560208882517982e-05, "loss": 0.0458, "step": 8890 }, { "epoch": 6.416726748377794, "grad_norm": 0.16068169474601746, "learning_rate": 9.558851718196127e-05, "loss": 0.0386, "step": 8900 }, { "epoch": 6.42393655371305, "grad_norm": 0.2035587728023529, "learning_rate": 9.557492559677646e-05, "loss": 0.0494, "step": 8910 }, { "epoch": 6.431146359048306, "grad_norm": 0.1656816154718399, "learning_rate": 9.556131407557082e-05, "loss": 0.0444, "step": 8920 }, { "epoch": 6.438356164383562, "grad_norm": 0.17842994630336761, "learning_rate": 9.554768262429853e-05, "loss": 0.0425, "step": 8930 }, { "epoch": 6.4455659697188175, "grad_norm": 0.17194123566150665, "learning_rate": 9.553403124892239e-05, "loss": 0.043, "step": 8940 }, { "epoch": 6.452775775054073, "grad_norm": 0.20823581516742706, "learning_rate": 9.552035995541402e-05, "loss": 0.0493, "step": 8950 }, { "epoch": 6.459985580389329, "grad_norm": 0.152860626578331, "learning_rate": 9.550666874975368e-05, "loss": 0.0521, "step": 8960 }, { "epoch": 6.467195385724585, "grad_norm": 0.15173658728599548, "learning_rate": 9.549295763793038e-05, "loss": 0.0432, "step": 8970 }, { "epoch": 6.474405191059842, "grad_norm": 0.15476107597351074, "learning_rate": 9.547922662594183e-05, "loss": 0.0449, "step": 8980 }, { "epoch": 6.4816149963950975, "grad_norm": 0.17680472135543823, "learning_rate": 9.546547571979443e-05, "loss": 0.0445, "step": 8990 }, { "epoch": 6.488824801730353, "grad_norm": 0.1775486320257187, "learning_rate": 9.545170492550331e-05, "loss": 0.044, "step": 9000 }, { "epoch": 6.496034607065609, "grad_norm": 0.11239801347255707, "learning_rate": 9.543791424909226e-05, "loss": 0.0394, "step": 9010 }, { "epoch": 6.503244412400865, "grad_norm": 0.20145682990550995, "learning_rate": 9.542410369659382e-05, "loss": 0.0462, "step": 9020 }, { "epoch": 6.510454217736121, "grad_norm": 0.14365065097808838, "learning_rate": 9.541027327404916e-05, "loss": 0.0431, "step": 9030 }, { "epoch": 6.517664023071377, "grad_norm": 0.1282082200050354, "learning_rate": 9.539642298750821e-05, "loss": 0.037, "step": 9040 }, { "epoch": 6.524873828406633, "grad_norm": 0.16227994859218597, "learning_rate": 9.538255284302954e-05, "loss": 0.0454, "step": 9050 }, { "epoch": 6.532083633741889, "grad_norm": 0.184687077999115, "learning_rate": 9.536866284668043e-05, "loss": 0.0467, "step": 9060 }, { "epoch": 6.539293439077145, "grad_norm": 0.15199704468250275, "learning_rate": 9.53547530045368e-05, "loss": 0.0383, "step": 9070 }, { "epoch": 6.546503244412401, "grad_norm": 0.2328799068927765, "learning_rate": 9.534082332268335e-05, "loss": 0.0515, "step": 9080 }, { "epoch": 6.553713049747657, "grad_norm": 0.1813650280237198, "learning_rate": 9.532687380721334e-05, "loss": 0.0365, "step": 9090 }, { "epoch": 6.560922855082913, "grad_norm": 0.1535281538963318, "learning_rate": 9.53129044642288e-05, "loss": 0.0426, "step": 9100 }, { "epoch": 6.5681326604181685, "grad_norm": 0.14241455495357513, "learning_rate": 9.529891529984039e-05, "loss": 0.0447, "step": 9110 }, { "epoch": 6.575342465753424, "grad_norm": 0.13631868362426758, "learning_rate": 9.528490632016743e-05, "loss": 0.0489, "step": 9120 }, { "epoch": 6.58255227108868, "grad_norm": 0.1645643264055252, "learning_rate": 9.52708775313379e-05, "loss": 0.0433, "step": 9130 }, { "epoch": 6.589762076423937, "grad_norm": 0.2209136039018631, "learning_rate": 9.525682893948853e-05, "loss": 0.045, "step": 9140 }, { "epoch": 6.596971881759193, "grad_norm": 0.15763860940933228, "learning_rate": 9.524276055076463e-05, "loss": 0.0468, "step": 9150 }, { "epoch": 6.6041816870944485, "grad_norm": 0.19221651554107666, "learning_rate": 9.522867237132016e-05, "loss": 0.047, "step": 9160 }, { "epoch": 6.611391492429704, "grad_norm": 0.18232111632823944, "learning_rate": 9.52145644073178e-05, "loss": 0.0409, "step": 9170 }, { "epoch": 6.61860129776496, "grad_norm": 0.15113626420497894, "learning_rate": 9.520043666492884e-05, "loss": 0.0479, "step": 9180 }, { "epoch": 6.625811103100216, "grad_norm": 0.16314327716827393, "learning_rate": 9.518628915033323e-05, "loss": 0.0436, "step": 9190 }, { "epoch": 6.633020908435472, "grad_norm": 0.131260484457016, "learning_rate": 9.517212186971957e-05, "loss": 0.0498, "step": 9200 }, { "epoch": 6.640230713770729, "grad_norm": 0.16551826894283295, "learning_rate": 9.515793482928515e-05, "loss": 0.0463, "step": 9210 }, { "epoch": 6.647440519105984, "grad_norm": 0.16666804254055023, "learning_rate": 9.514372803523582e-05, "loss": 0.05, "step": 9220 }, { "epoch": 6.65465032444124, "grad_norm": 0.20504240691661835, "learning_rate": 9.512950149378613e-05, "loss": 0.0469, "step": 9230 }, { "epoch": 6.661860129776496, "grad_norm": 0.1280423253774643, "learning_rate": 9.511525521115925e-05, "loss": 0.0408, "step": 9240 }, { "epoch": 6.669069935111752, "grad_norm": 0.1748305857181549, "learning_rate": 9.510098919358698e-05, "loss": 0.0436, "step": 9250 }, { "epoch": 6.676279740447008, "grad_norm": 0.1729923039674759, "learning_rate": 9.508670344730979e-05, "loss": 0.0494, "step": 9260 }, { "epoch": 6.683489545782264, "grad_norm": 0.11632757633924484, "learning_rate": 9.507239797857672e-05, "loss": 0.0461, "step": 9270 }, { "epoch": 6.6906993511175195, "grad_norm": 0.20830240845680237, "learning_rate": 9.505807279364548e-05, "loss": 0.0431, "step": 9280 }, { "epoch": 6.697909156452775, "grad_norm": 0.1535077691078186, "learning_rate": 9.504372789878239e-05, "loss": 0.0392, "step": 9290 }, { "epoch": 6.705118961788032, "grad_norm": 0.20325760543346405, "learning_rate": 9.502936330026239e-05, "loss": 0.0443, "step": 9300 }, { "epoch": 6.712328767123288, "grad_norm": 0.19764460623264313, "learning_rate": 9.501497900436906e-05, "loss": 0.041, "step": 9310 }, { "epoch": 6.719538572458544, "grad_norm": 0.2003001868724823, "learning_rate": 9.500057501739457e-05, "loss": 0.0438, "step": 9320 }, { "epoch": 6.7267483777937995, "grad_norm": 0.20760610699653625, "learning_rate": 9.49861513456397e-05, "loss": 0.0443, "step": 9330 }, { "epoch": 6.733958183129055, "grad_norm": 0.1370125412940979, "learning_rate": 9.497170799541388e-05, "loss": 0.0516, "step": 9340 }, { "epoch": 6.741167988464311, "grad_norm": 0.15027716755867004, "learning_rate": 9.495724497303508e-05, "loss": 0.0409, "step": 9350 }, { "epoch": 6.748377793799567, "grad_norm": 0.16602197289466858, "learning_rate": 9.494276228482998e-05, "loss": 0.043, "step": 9360 }, { "epoch": 6.755587599134824, "grad_norm": 0.18502403795719147, "learning_rate": 9.492825993713374e-05, "loss": 0.0496, "step": 9370 }, { "epoch": 6.76279740447008, "grad_norm": 0.19086089730262756, "learning_rate": 9.491373793629023e-05, "loss": 0.0458, "step": 9380 }, { "epoch": 6.770007209805335, "grad_norm": 0.18573275208473206, "learning_rate": 9.489919628865182e-05, "loss": 0.0493, "step": 9390 }, { "epoch": 6.777217015140591, "grad_norm": 0.17771920561790466, "learning_rate": 9.488463500057955e-05, "loss": 0.0418, "step": 9400 }, { "epoch": 6.784426820475847, "grad_norm": 0.14360691606998444, "learning_rate": 9.487005407844302e-05, "loss": 0.041, "step": 9410 }, { "epoch": 6.791636625811103, "grad_norm": 0.1524164229631424, "learning_rate": 9.485545352862039e-05, "loss": 0.0459, "step": 9420 }, { "epoch": 6.798846431146359, "grad_norm": 0.19278207421302795, "learning_rate": 9.48408333574985e-05, "loss": 0.0379, "step": 9430 }, { "epoch": 6.806056236481615, "grad_norm": 0.18740519881248474, "learning_rate": 9.482619357147264e-05, "loss": 0.0443, "step": 9440 }, { "epoch": 6.8132660418168705, "grad_norm": 0.13222698867321014, "learning_rate": 9.481153417694679e-05, "loss": 0.0516, "step": 9450 }, { "epoch": 6.820475847152127, "grad_norm": 0.162553071975708, "learning_rate": 9.479685518033347e-05, "loss": 0.0412, "step": 9460 }, { "epoch": 6.827685652487383, "grad_norm": 0.15002034604549408, "learning_rate": 9.478215658805375e-05, "loss": 0.0478, "step": 9470 }, { "epoch": 6.834895457822639, "grad_norm": 0.16687561571598053, "learning_rate": 9.47674384065373e-05, "loss": 0.0465, "step": 9480 }, { "epoch": 6.842105263157895, "grad_norm": 0.1717001497745514, "learning_rate": 9.475270064222237e-05, "loss": 0.0479, "step": 9490 }, { "epoch": 6.8493150684931505, "grad_norm": 0.1480223834514618, "learning_rate": 9.473794330155572e-05, "loss": 0.0391, "step": 9500 }, { "epoch": 6.856524873828406, "grad_norm": 0.18215139210224152, "learning_rate": 9.472316639099276e-05, "loss": 0.0501, "step": 9510 }, { "epoch": 6.863734679163662, "grad_norm": 0.1685115396976471, "learning_rate": 9.470836991699739e-05, "loss": 0.0408, "step": 9520 }, { "epoch": 6.870944484498919, "grad_norm": 0.14659228920936584, "learning_rate": 9.469355388604208e-05, "loss": 0.0403, "step": 9530 }, { "epoch": 6.878154289834175, "grad_norm": 0.16239288449287415, "learning_rate": 9.467871830460787e-05, "loss": 0.0469, "step": 9540 }, { "epoch": 6.885364095169431, "grad_norm": 0.1551590859889984, "learning_rate": 9.466386317918436e-05, "loss": 0.0432, "step": 9550 }, { "epoch": 6.892573900504686, "grad_norm": 0.11274154484272003, "learning_rate": 9.464898851626969e-05, "loss": 0.0437, "step": 9560 }, { "epoch": 6.899783705839942, "grad_norm": 0.1787160038948059, "learning_rate": 9.463409432237051e-05, "loss": 0.047, "step": 9570 }, { "epoch": 6.906993511175198, "grad_norm": 0.12776993215084076, "learning_rate": 9.461918060400209e-05, "loss": 0.043, "step": 9580 }, { "epoch": 6.914203316510454, "grad_norm": 0.12338434904813766, "learning_rate": 9.460424736768816e-05, "loss": 0.0429, "step": 9590 }, { "epoch": 6.92141312184571, "grad_norm": 0.14314596354961395, "learning_rate": 9.458929461996105e-05, "loss": 0.0375, "step": 9600 }, { "epoch": 6.928622927180966, "grad_norm": 0.19631962478160858, "learning_rate": 9.457432236736158e-05, "loss": 0.0459, "step": 9610 }, { "epoch": 6.935832732516222, "grad_norm": 0.15910379588603973, "learning_rate": 9.455933061643916e-05, "loss": 0.0459, "step": 9620 }, { "epoch": 6.943042537851478, "grad_norm": 0.13867276906967163, "learning_rate": 9.454431937375164e-05, "loss": 0.0503, "step": 9630 }, { "epoch": 6.950252343186734, "grad_norm": 0.1340874880552292, "learning_rate": 9.452928864586547e-05, "loss": 0.0426, "step": 9640 }, { "epoch": 6.95746214852199, "grad_norm": 0.1765204519033432, "learning_rate": 9.451423843935563e-05, "loss": 0.0446, "step": 9650 }, { "epoch": 6.964671953857246, "grad_norm": 0.1775377094745636, "learning_rate": 9.449916876080553e-05, "loss": 0.0507, "step": 9660 }, { "epoch": 6.9718817591925015, "grad_norm": 0.17200933396816254, "learning_rate": 9.44840796168072e-05, "loss": 0.047, "step": 9670 }, { "epoch": 6.979091564527757, "grad_norm": 0.20372715592384338, "learning_rate": 9.446897101396115e-05, "loss": 0.0485, "step": 9680 }, { "epoch": 6.986301369863014, "grad_norm": 0.16005262732505798, "learning_rate": 9.445384295887638e-05, "loss": 0.0393, "step": 9690 }, { "epoch": 6.99351117519827, "grad_norm": 0.1736975908279419, "learning_rate": 9.443869545817043e-05, "loss": 0.0433, "step": 9700 }, { "epoch": 7.000720980533526, "grad_norm": 0.12984897196292877, "learning_rate": 9.442352851846929e-05, "loss": 0.0428, "step": 9710 }, { "epoch": 7.007930785868782, "grad_norm": 0.13122859597206116, "learning_rate": 9.440834214640755e-05, "loss": 0.045, "step": 9720 }, { "epoch": 7.015140591204037, "grad_norm": 0.18020115792751312, "learning_rate": 9.439313634862823e-05, "loss": 0.0465, "step": 9730 }, { "epoch": 7.022350396539293, "grad_norm": 0.145697683095932, "learning_rate": 9.437791113178282e-05, "loss": 0.0471, "step": 9740 }, { "epoch": 7.029560201874549, "grad_norm": 0.17386209964752197, "learning_rate": 9.43626665025314e-05, "loss": 0.0453, "step": 9750 }, { "epoch": 7.036770007209805, "grad_norm": 0.19388525187969208, "learning_rate": 9.434740246754248e-05, "loss": 0.0424, "step": 9760 }, { "epoch": 7.043979812545062, "grad_norm": 0.18555738031864166, "learning_rate": 9.433211903349304e-05, "loss": 0.0414, "step": 9770 }, { "epoch": 7.0511896178803175, "grad_norm": 0.11555543541908264, "learning_rate": 9.431681620706858e-05, "loss": 0.041, "step": 9780 }, { "epoch": 7.058399423215573, "grad_norm": 0.14735670387744904, "learning_rate": 9.43014939949631e-05, "loss": 0.0322, "step": 9790 }, { "epoch": 7.065609228550829, "grad_norm": 0.12371181696653366, "learning_rate": 9.428615240387904e-05, "loss": 0.0458, "step": 9800 }, { "epoch": 7.072819033886085, "grad_norm": 0.16020932793617249, "learning_rate": 9.427079144052732e-05, "loss": 0.0411, "step": 9810 }, { "epoch": 7.080028839221341, "grad_norm": 0.13153843581676483, "learning_rate": 9.425541111162739e-05, "loss": 0.0422, "step": 9820 }, { "epoch": 7.087238644556597, "grad_norm": 0.16669784486293793, "learning_rate": 9.424001142390709e-05, "loss": 0.0435, "step": 9830 }, { "epoch": 7.0944484498918525, "grad_norm": 0.1554129421710968, "learning_rate": 9.422459238410277e-05, "loss": 0.0433, "step": 9840 }, { "epoch": 7.101658255227109, "grad_norm": 0.11810388416051865, "learning_rate": 9.420915399895926e-05, "loss": 0.0485, "step": 9850 }, { "epoch": 7.108868060562365, "grad_norm": 0.13054227828979492, "learning_rate": 9.419369627522981e-05, "loss": 0.0489, "step": 9860 }, { "epoch": 7.116077865897621, "grad_norm": 0.12693315744400024, "learning_rate": 9.417821921967618e-05, "loss": 0.0438, "step": 9870 }, { "epoch": 7.123287671232877, "grad_norm": 0.14149944484233856, "learning_rate": 9.416272283906855e-05, "loss": 0.0464, "step": 9880 }, { "epoch": 7.130497476568133, "grad_norm": 0.15977917611598969, "learning_rate": 9.414720714018554e-05, "loss": 0.0432, "step": 9890 }, { "epoch": 7.137707281903388, "grad_norm": 0.18820597231388092, "learning_rate": 9.413167212981427e-05, "loss": 0.0465, "step": 9900 }, { "epoch": 7.144917087238644, "grad_norm": 0.1506577581167221, "learning_rate": 9.41161178147503e-05, "loss": 0.0422, "step": 9910 }, { "epoch": 7.1521268925739, "grad_norm": 0.14088603854179382, "learning_rate": 9.410054420179755e-05, "loss": 0.0506, "step": 9920 }, { "epoch": 7.159336697909157, "grad_norm": 0.17582793533802032, "learning_rate": 9.408495129776851e-05, "loss": 0.0362, "step": 9930 }, { "epoch": 7.166546503244413, "grad_norm": 0.13561750948429108, "learning_rate": 9.406933910948403e-05, "loss": 0.0456, "step": 9940 }, { "epoch": 7.1737563085796685, "grad_norm": 0.167205810546875, "learning_rate": 9.40537076437734e-05, "loss": 0.0456, "step": 9950 }, { "epoch": 7.180966113914924, "grad_norm": 0.20598551630973816, "learning_rate": 9.403805690747436e-05, "loss": 0.0523, "step": 9960 }, { "epoch": 7.18817591925018, "grad_norm": 0.17913909256458282, "learning_rate": 9.402238690743308e-05, "loss": 0.0459, "step": 9970 }, { "epoch": 7.195385724585436, "grad_norm": 0.13325512409210205, "learning_rate": 9.400669765050413e-05, "loss": 0.0444, "step": 9980 }, { "epoch": 7.202595529920692, "grad_norm": 0.1195850595831871, "learning_rate": 9.399098914355055e-05, "loss": 0.0457, "step": 9990 }, { "epoch": 7.209805335255948, "grad_norm": 0.13285435736179352, "learning_rate": 9.397526139344378e-05, "loss": 0.0446, "step": 10000 }, { "epoch": 7.217015140591204, "grad_norm": 0.13566605746746063, "learning_rate": 9.395951440706362e-05, "loss": 0.0441, "step": 10010 }, { "epoch": 7.22422494592646, "grad_norm": 0.16160109639167786, "learning_rate": 9.394374819129839e-05, "loss": 0.0437, "step": 10020 }, { "epoch": 7.231434751261716, "grad_norm": 0.16503113508224487, "learning_rate": 9.392796275304474e-05, "loss": 0.0467, "step": 10030 }, { "epoch": 7.238644556596972, "grad_norm": 0.18493054807186127, "learning_rate": 9.391215809920778e-05, "loss": 0.0468, "step": 10040 }, { "epoch": 7.245854361932228, "grad_norm": 0.18994073569774628, "learning_rate": 9.389633423670099e-05, "loss": 0.0471, "step": 10050 }, { "epoch": 7.253064167267484, "grad_norm": 0.1670682281255722, "learning_rate": 9.388049117244626e-05, "loss": 0.0414, "step": 10060 }, { "epoch": 7.260273972602739, "grad_norm": 0.19963623583316803, "learning_rate": 9.386462891337389e-05, "loss": 0.0416, "step": 10070 }, { "epoch": 7.267483777937995, "grad_norm": 0.1487332284450531, "learning_rate": 9.384874746642257e-05, "loss": 0.0394, "step": 10080 }, { "epoch": 7.274693583273252, "grad_norm": 0.18729738891124725, "learning_rate": 9.383284683853937e-05, "loss": 0.0414, "step": 10090 }, { "epoch": 7.281903388608508, "grad_norm": 0.1102692186832428, "learning_rate": 9.381692703667981e-05, "loss": 0.0431, "step": 10100 }, { "epoch": 7.289113193943764, "grad_norm": 0.1602058708667755, "learning_rate": 9.380098806780771e-05, "loss": 0.0397, "step": 10110 }, { "epoch": 7.2963229992790195, "grad_norm": 0.15763600170612335, "learning_rate": 9.378502993889533e-05, "loss": 0.0427, "step": 10120 }, { "epoch": 7.303532804614275, "grad_norm": 0.18416275084018707, "learning_rate": 9.376905265692329e-05, "loss": 0.0453, "step": 10130 }, { "epoch": 7.310742609949531, "grad_norm": 0.13119474053382874, "learning_rate": 9.37530562288806e-05, "loss": 0.0452, "step": 10140 }, { "epoch": 7.317952415284787, "grad_norm": 0.13688917458057404, "learning_rate": 9.373704066176465e-05, "loss": 0.0426, "step": 10150 }, { "epoch": 7.325162220620043, "grad_norm": 0.13017615675926208, "learning_rate": 9.372100596258118e-05, "loss": 0.0412, "step": 10160 }, { "epoch": 7.3323720259553, "grad_norm": 0.14306701719760895, "learning_rate": 9.370495213834433e-05, "loss": 0.0407, "step": 10170 }, { "epoch": 7.339581831290555, "grad_norm": 0.1393345445394516, "learning_rate": 9.368887919607657e-05, "loss": 0.0429, "step": 10180 }, { "epoch": 7.346791636625811, "grad_norm": 0.14244526624679565, "learning_rate": 9.367278714280876e-05, "loss": 0.0402, "step": 10190 }, { "epoch": 7.354001441961067, "grad_norm": 0.16764132678508759, "learning_rate": 9.36566759855801e-05, "loss": 0.0445, "step": 10200 }, { "epoch": 7.361211247296323, "grad_norm": 0.12444949895143509, "learning_rate": 9.36405457314382e-05, "loss": 0.0421, "step": 10210 }, { "epoch": 7.368421052631579, "grad_norm": 0.1661926656961441, "learning_rate": 9.36243963874389e-05, "loss": 0.0407, "step": 10220 }, { "epoch": 7.375630857966835, "grad_norm": 0.119069904088974, "learning_rate": 9.360822796064655e-05, "loss": 0.0458, "step": 10230 }, { "epoch": 7.38284066330209, "grad_norm": 0.16213403642177582, "learning_rate": 9.359204045813372e-05, "loss": 0.0403, "step": 10240 }, { "epoch": 7.390050468637347, "grad_norm": 0.13027836382389069, "learning_rate": 9.357583388698141e-05, "loss": 0.0427, "step": 10250 }, { "epoch": 7.397260273972603, "grad_norm": 0.16820946335792542, "learning_rate": 9.35596082542789e-05, "loss": 0.0396, "step": 10260 }, { "epoch": 7.404470079307859, "grad_norm": 0.12045355886220932, "learning_rate": 9.354336356712383e-05, "loss": 0.0412, "step": 10270 }, { "epoch": 7.411679884643115, "grad_norm": 0.12366096675395966, "learning_rate": 9.35270998326222e-05, "loss": 0.0487, "step": 10280 }, { "epoch": 7.4188896899783705, "grad_norm": 0.1588553935289383, "learning_rate": 9.351081705788831e-05, "loss": 0.0453, "step": 10290 }, { "epoch": 7.426099495313626, "grad_norm": 0.10763750225305557, "learning_rate": 9.349451525004477e-05, "loss": 0.0416, "step": 10300 }, { "epoch": 7.433309300648882, "grad_norm": 0.1378103345632553, "learning_rate": 9.347819441622261e-05, "loss": 0.0378, "step": 10310 }, { "epoch": 7.440519105984138, "grad_norm": 0.15960434079170227, "learning_rate": 9.346185456356105e-05, "loss": 0.049, "step": 10320 }, { "epoch": 7.447728911319395, "grad_norm": 0.15347367525100708, "learning_rate": 9.344549569920774e-05, "loss": 0.0364, "step": 10330 }, { "epoch": 7.454938716654651, "grad_norm": 0.17124618589878082, "learning_rate": 9.342911783031858e-05, "loss": 0.043, "step": 10340 }, { "epoch": 7.462148521989906, "grad_norm": 0.12191613018512726, "learning_rate": 9.341272096405782e-05, "loss": 0.0427, "step": 10350 }, { "epoch": 7.469358327325162, "grad_norm": 0.1168908029794693, "learning_rate": 9.3396305107598e-05, "loss": 0.043, "step": 10360 }, { "epoch": 7.476568132660418, "grad_norm": 0.11451573669910431, "learning_rate": 9.337987026811998e-05, "loss": 0.0423, "step": 10370 }, { "epoch": 7.483777937995674, "grad_norm": 0.11333142966032028, "learning_rate": 9.33634164528129e-05, "loss": 0.0412, "step": 10380 }, { "epoch": 7.49098774333093, "grad_norm": 0.14112015068531036, "learning_rate": 9.334694366887424e-05, "loss": 0.0415, "step": 10390 }, { "epoch": 7.498197548666186, "grad_norm": 0.15732988715171814, "learning_rate": 9.333045192350973e-05, "loss": 0.0458, "step": 10400 }, { "epoch": 7.505407354001442, "grad_norm": 0.15126384794712067, "learning_rate": 9.331394122393345e-05, "loss": 0.0361, "step": 10410 }, { "epoch": 7.512617159336698, "grad_norm": 0.1459011286497116, "learning_rate": 9.329741157736771e-05, "loss": 0.0431, "step": 10420 }, { "epoch": 7.519826964671954, "grad_norm": 0.16095896065235138, "learning_rate": 9.328086299104317e-05, "loss": 0.0442, "step": 10430 }, { "epoch": 7.52703677000721, "grad_norm": 0.16311445832252502, "learning_rate": 9.326429547219872e-05, "loss": 0.0445, "step": 10440 }, { "epoch": 7.534246575342466, "grad_norm": 0.14654122292995453, "learning_rate": 9.324770902808155e-05, "loss": 0.0427, "step": 10450 }, { "epoch": 7.5414563806777215, "grad_norm": 0.10207711160182953, "learning_rate": 9.323110366594717e-05, "loss": 0.0404, "step": 10460 }, { "epoch": 7.548666186012977, "grad_norm": 0.1649676114320755, "learning_rate": 9.32144793930593e-05, "loss": 0.0452, "step": 10470 }, { "epoch": 7.555875991348234, "grad_norm": 0.1935712993144989, "learning_rate": 9.319783621668996e-05, "loss": 0.0434, "step": 10480 }, { "epoch": 7.56308579668349, "grad_norm": 0.1583772897720337, "learning_rate": 9.318117414411947e-05, "loss": 0.0411, "step": 10490 }, { "epoch": 7.570295602018746, "grad_norm": 0.14632175862789154, "learning_rate": 9.316449318263635e-05, "loss": 0.0425, "step": 10500 }, { "epoch": 7.577505407354002, "grad_norm": 0.1320124715566635, "learning_rate": 9.314779333953744e-05, "loss": 0.0406, "step": 10510 }, { "epoch": 7.584715212689257, "grad_norm": 0.14778223633766174, "learning_rate": 9.313107462212781e-05, "loss": 0.0506, "step": 10520 }, { "epoch": 7.591925018024513, "grad_norm": 0.15434707701206207, "learning_rate": 9.311433703772082e-05, "loss": 0.0471, "step": 10530 }, { "epoch": 7.599134823359769, "grad_norm": 0.14770811796188354, "learning_rate": 9.3097580593638e-05, "loss": 0.0427, "step": 10540 }, { "epoch": 7.606344628695025, "grad_norm": 0.0899270623922348, "learning_rate": 9.308080529720926e-05, "loss": 0.0392, "step": 10550 }, { "epoch": 7.613554434030281, "grad_norm": 0.13573044538497925, "learning_rate": 9.306401115577264e-05, "loss": 0.0427, "step": 10560 }, { "epoch": 7.6207642393655375, "grad_norm": 0.15235665440559387, "learning_rate": 9.304719817667447e-05, "loss": 0.043, "step": 10570 }, { "epoch": 7.627974044700793, "grad_norm": 0.1504439264535904, "learning_rate": 9.303036636726934e-05, "loss": 0.0394, "step": 10580 }, { "epoch": 7.635183850036049, "grad_norm": 0.1060604453086853, "learning_rate": 9.301351573492003e-05, "loss": 0.0372, "step": 10590 }, { "epoch": 7.642393655371305, "grad_norm": 0.11881281435489655, "learning_rate": 9.299664628699758e-05, "loss": 0.0414, "step": 10600 }, { "epoch": 7.649603460706561, "grad_norm": 0.13192245364189148, "learning_rate": 9.297975803088129e-05, "loss": 0.0383, "step": 10610 }, { "epoch": 7.656813266041817, "grad_norm": 0.11286875605583191, "learning_rate": 9.296285097395864e-05, "loss": 0.0426, "step": 10620 }, { "epoch": 7.6640230713770725, "grad_norm": 0.18043790757656097, "learning_rate": 9.294592512362533e-05, "loss": 0.0407, "step": 10630 }, { "epoch": 7.671232876712329, "grad_norm": 0.1499873399734497, "learning_rate": 9.292898048728533e-05, "loss": 0.0413, "step": 10640 }, { "epoch": 7.678442682047585, "grad_norm": 0.14221619069576263, "learning_rate": 9.29120170723508e-05, "loss": 0.042, "step": 10650 }, { "epoch": 7.685652487382841, "grad_norm": 0.16123975813388824, "learning_rate": 9.28950348862421e-05, "loss": 0.0422, "step": 10660 }, { "epoch": 7.692862292718097, "grad_norm": 0.16119685769081116, "learning_rate": 9.287803393638781e-05, "loss": 0.0397, "step": 10670 }, { "epoch": 7.700072098053353, "grad_norm": 0.13847385346889496, "learning_rate": 9.286101423022474e-05, "loss": 0.0371, "step": 10680 }, { "epoch": 7.707281903388608, "grad_norm": 0.16359320282936096, "learning_rate": 9.284397577519788e-05, "loss": 0.0428, "step": 10690 }, { "epoch": 7.714491708723864, "grad_norm": 0.169193834066391, "learning_rate": 9.282691857876043e-05, "loss": 0.0383, "step": 10700 }, { "epoch": 7.72170151405912, "grad_norm": 0.19114460051059723, "learning_rate": 9.280984264837377e-05, "loss": 0.0444, "step": 10710 }, { "epoch": 7.728911319394376, "grad_norm": 0.1168084591627121, "learning_rate": 9.279274799150752e-05, "loss": 0.0434, "step": 10720 }, { "epoch": 7.736121124729633, "grad_norm": 0.11302043497562408, "learning_rate": 9.277563461563945e-05, "loss": 0.0429, "step": 10730 }, { "epoch": 7.7433309300648885, "grad_norm": 0.15681877732276917, "learning_rate": 9.275850252825555e-05, "loss": 0.0412, "step": 10740 }, { "epoch": 7.750540735400144, "grad_norm": 0.19134704768657684, "learning_rate": 9.274135173684994e-05, "loss": 0.0442, "step": 10750 }, { "epoch": 7.7577505407354, "grad_norm": 0.17345267534255981, "learning_rate": 9.272418224892498e-05, "loss": 0.0414, "step": 10760 }, { "epoch": 7.764960346070656, "grad_norm": 0.19591881334781647, "learning_rate": 9.27069940719912e-05, "loss": 0.0491, "step": 10770 }, { "epoch": 7.772170151405912, "grad_norm": 0.19303886592388153, "learning_rate": 9.268978721356727e-05, "loss": 0.044, "step": 10780 }, { "epoch": 7.779379956741168, "grad_norm": 0.13605885207653046, "learning_rate": 9.267256168118008e-05, "loss": 0.0413, "step": 10790 }, { "epoch": 7.786589762076424, "grad_norm": 0.1832304149866104, "learning_rate": 9.265531748236463e-05, "loss": 0.0482, "step": 10800 }, { "epoch": 7.79379956741168, "grad_norm": 0.1320267617702484, "learning_rate": 9.263805462466416e-05, "loss": 0.0394, "step": 10810 }, { "epoch": 7.801009372746936, "grad_norm": 0.13550139963626862, "learning_rate": 9.262077311562998e-05, "loss": 0.0465, "step": 10820 }, { "epoch": 7.808219178082192, "grad_norm": 0.18262693285942078, "learning_rate": 9.260347296282165e-05, "loss": 0.041, "step": 10830 }, { "epoch": 7.815428983417448, "grad_norm": 0.20429670810699463, "learning_rate": 9.258615417380683e-05, "loss": 0.0464, "step": 10840 }, { "epoch": 7.822638788752704, "grad_norm": 0.21737445890903473, "learning_rate": 9.256881675616133e-05, "loss": 0.0423, "step": 10850 }, { "epoch": 7.829848594087959, "grad_norm": 0.1325070708990097, "learning_rate": 9.255146071746917e-05, "loss": 0.0355, "step": 10860 }, { "epoch": 7.837058399423215, "grad_norm": 0.13985352218151093, "learning_rate": 9.253408606532241e-05, "loss": 0.0464, "step": 10870 }, { "epoch": 7.844268204758471, "grad_norm": 0.10638557374477386, "learning_rate": 9.251669280732137e-05, "loss": 0.0346, "step": 10880 }, { "epoch": 7.851478010093728, "grad_norm": 0.11442389339208603, "learning_rate": 9.249928095107441e-05, "loss": 0.0419, "step": 10890 }, { "epoch": 7.858687815428984, "grad_norm": 0.12467783689498901, "learning_rate": 9.248185050419811e-05, "loss": 0.0385, "step": 10900 }, { "epoch": 7.8658976207642395, "grad_norm": 0.1694924682378769, "learning_rate": 9.24644014743171e-05, "loss": 0.0385, "step": 10910 }, { "epoch": 7.873107426099495, "grad_norm": 0.1562197357416153, "learning_rate": 9.24469338690642e-05, "loss": 0.0371, "step": 10920 }, { "epoch": 7.880317231434751, "grad_norm": 0.14651696383953094, "learning_rate": 9.242944769608033e-05, "loss": 0.0433, "step": 10930 }, { "epoch": 7.887527036770007, "grad_norm": 0.1220702975988388, "learning_rate": 9.241194296301454e-05, "loss": 0.0414, "step": 10940 }, { "epoch": 7.894736842105263, "grad_norm": 0.1672317236661911, "learning_rate": 9.239441967752397e-05, "loss": 0.043, "step": 10950 }, { "epoch": 7.9019466474405196, "grad_norm": 0.11049538850784302, "learning_rate": 9.237687784727393e-05, "loss": 0.0411, "step": 10960 }, { "epoch": 7.909156452775775, "grad_norm": 0.13826073706150055, "learning_rate": 9.235931747993781e-05, "loss": 0.0395, "step": 10970 }, { "epoch": 7.916366258111031, "grad_norm": 0.1567201316356659, "learning_rate": 9.234173858319707e-05, "loss": 0.0439, "step": 10980 }, { "epoch": 7.923576063446287, "grad_norm": 0.14734245836734772, "learning_rate": 9.23241411647414e-05, "loss": 0.039, "step": 10990 }, { "epoch": 7.930785868781543, "grad_norm": 0.16321471333503723, "learning_rate": 9.230652523226841e-05, "loss": 0.0526, "step": 11000 }, { "epoch": 7.937995674116799, "grad_norm": 0.16532255709171295, "learning_rate": 9.2288890793484e-05, "loss": 0.0437, "step": 11010 }, { "epoch": 7.945205479452055, "grad_norm": 0.17466339468955994, "learning_rate": 9.227123785610199e-05, "loss": 0.0396, "step": 11020 }, { "epoch": 7.95241528478731, "grad_norm": 0.1333620697259903, "learning_rate": 9.225356642784443e-05, "loss": 0.0439, "step": 11030 }, { "epoch": 7.959625090122566, "grad_norm": 0.2232920229434967, "learning_rate": 9.223587651644139e-05, "loss": 0.0405, "step": 11040 }, { "epoch": 7.966834895457823, "grad_norm": 0.16275806725025177, "learning_rate": 9.221816812963104e-05, "loss": 0.0371, "step": 11050 }, { "epoch": 7.974044700793079, "grad_norm": 0.12280899286270142, "learning_rate": 9.22004412751596e-05, "loss": 0.0381, "step": 11060 }, { "epoch": 7.981254506128335, "grad_norm": 0.14348645508289337, "learning_rate": 9.218269596078146e-05, "loss": 0.0428, "step": 11070 }, { "epoch": 7.9884643114635905, "grad_norm": 0.1435224562883377, "learning_rate": 9.216493219425895e-05, "loss": 0.0411, "step": 11080 }, { "epoch": 7.995674116798846, "grad_norm": 0.13754314184188843, "learning_rate": 9.21471499833626e-05, "loss": 0.0468, "step": 11090 }, { "epoch": 8.002883922134103, "grad_norm": 0.17340193688869476, "learning_rate": 9.212934933587094e-05, "loss": 0.044, "step": 11100 }, { "epoch": 8.010093727469359, "grad_norm": 0.1380314826965332, "learning_rate": 9.211153025957056e-05, "loss": 0.043, "step": 11110 }, { "epoch": 8.017303532804615, "grad_norm": 0.16779717803001404, "learning_rate": 9.209369276225614e-05, "loss": 0.0408, "step": 11120 }, { "epoch": 8.02451333813987, "grad_norm": 0.17925916612148285, "learning_rate": 9.207583685173042e-05, "loss": 0.041, "step": 11130 }, { "epoch": 8.031723143475126, "grad_norm": 0.1575891077518463, "learning_rate": 9.205796253580417e-05, "loss": 0.0396, "step": 11140 }, { "epoch": 8.038932948810382, "grad_norm": 0.16258038580417633, "learning_rate": 9.204006982229621e-05, "loss": 0.0541, "step": 11150 }, { "epoch": 8.046142754145638, "grad_norm": 0.15205633640289307, "learning_rate": 9.202215871903346e-05, "loss": 0.0423, "step": 11160 }, { "epoch": 8.053352559480894, "grad_norm": 0.15783509612083435, "learning_rate": 9.20042292338508e-05, "loss": 0.0411, "step": 11170 }, { "epoch": 8.06056236481615, "grad_norm": 0.17299921810626984, "learning_rate": 9.198628137459123e-05, "loss": 0.044, "step": 11180 }, { "epoch": 8.067772170151406, "grad_norm": 0.1325407177209854, "learning_rate": 9.196831514910572e-05, "loss": 0.0364, "step": 11190 }, { "epoch": 8.074981975486661, "grad_norm": 0.1221093162894249, "learning_rate": 9.195033056525332e-05, "loss": 0.0454, "step": 11200 }, { "epoch": 8.082191780821917, "grad_norm": 0.1577150523662567, "learning_rate": 9.193232763090114e-05, "loss": 0.0459, "step": 11210 }, { "epoch": 8.089401586157173, "grad_norm": 0.12088416516780853, "learning_rate": 9.191430635392422e-05, "loss": 0.0381, "step": 11220 }, { "epoch": 8.096611391492429, "grad_norm": 0.14987614750862122, "learning_rate": 9.18962667422057e-05, "loss": 0.0411, "step": 11230 }, { "epoch": 8.103821196827685, "grad_norm": 0.13551490008831024, "learning_rate": 9.187820880363671e-05, "loss": 0.0489, "step": 11240 }, { "epoch": 8.111031002162942, "grad_norm": 0.22424893081188202, "learning_rate": 9.186013254611643e-05, "loss": 0.0467, "step": 11250 }, { "epoch": 8.118240807498198, "grad_norm": 0.2214055359363556, "learning_rate": 9.1842037977552e-05, "loss": 0.0469, "step": 11260 }, { "epoch": 8.125450612833454, "grad_norm": 0.1227397546172142, "learning_rate": 9.182392510585862e-05, "loss": 0.0367, "step": 11270 }, { "epoch": 8.13266041816871, "grad_norm": 0.15552663803100586, "learning_rate": 9.180579393895946e-05, "loss": 0.0399, "step": 11280 }, { "epoch": 8.139870223503966, "grad_norm": 0.1486702710390091, "learning_rate": 9.178764448478572e-05, "loss": 0.0399, "step": 11290 }, { "epoch": 8.147080028839222, "grad_norm": 0.1748848557472229, "learning_rate": 9.176947675127658e-05, "loss": 0.0474, "step": 11300 }, { "epoch": 8.154289834174477, "grad_norm": 0.13384518027305603, "learning_rate": 9.175129074637924e-05, "loss": 0.0405, "step": 11310 }, { "epoch": 8.161499639509733, "grad_norm": 0.15412192046642303, "learning_rate": 9.173308647804884e-05, "loss": 0.0428, "step": 11320 }, { "epoch": 8.168709444844989, "grad_norm": 0.17233960330486298, "learning_rate": 9.171486395424859e-05, "loss": 0.0405, "step": 11330 }, { "epoch": 8.175919250180245, "grad_norm": 0.14915750920772552, "learning_rate": 9.16966231829496e-05, "loss": 0.0406, "step": 11340 }, { "epoch": 8.1831290555155, "grad_norm": 0.17488856613636017, "learning_rate": 9.167836417213105e-05, "loss": 0.0445, "step": 11350 }, { "epoch": 8.190338860850757, "grad_norm": 0.10375533998012543, "learning_rate": 9.166008692978001e-05, "loss": 0.0421, "step": 11360 }, { "epoch": 8.197548666186012, "grad_norm": 0.1357211321592331, "learning_rate": 9.164179146389158e-05, "loss": 0.0437, "step": 11370 }, { "epoch": 8.204758471521268, "grad_norm": 0.1940808892250061, "learning_rate": 9.162347778246882e-05, "loss": 0.0428, "step": 11380 }, { "epoch": 8.211968276856524, "grad_norm": 0.13702423870563507, "learning_rate": 9.160514589352276e-05, "loss": 0.0318, "step": 11390 }, { "epoch": 8.219178082191782, "grad_norm": 0.1410423070192337, "learning_rate": 9.15867958050724e-05, "loss": 0.0357, "step": 11400 }, { "epoch": 8.226387887527038, "grad_norm": 0.1819317489862442, "learning_rate": 9.156842752514466e-05, "loss": 0.0385, "step": 11410 }, { "epoch": 8.233597692862293, "grad_norm": 0.18561819195747375, "learning_rate": 9.155004106177447e-05, "loss": 0.0385, "step": 11420 }, { "epoch": 8.24080749819755, "grad_norm": 0.16018345952033997, "learning_rate": 9.153163642300471e-05, "loss": 0.0455, "step": 11430 }, { "epoch": 8.248017303532805, "grad_norm": 0.17529207468032837, "learning_rate": 9.151321361688616e-05, "loss": 0.0414, "step": 11440 }, { "epoch": 8.25522710886806, "grad_norm": 0.1309075802564621, "learning_rate": 9.149477265147762e-05, "loss": 0.0393, "step": 11450 }, { "epoch": 8.262436914203317, "grad_norm": 0.10469596832990646, "learning_rate": 9.147631353484574e-05, "loss": 0.0411, "step": 11460 }, { "epoch": 8.269646719538573, "grad_norm": 0.17111937701702118, "learning_rate": 9.145783627506522e-05, "loss": 0.0536, "step": 11470 }, { "epoch": 8.276856524873828, "grad_norm": 0.12100786715745926, "learning_rate": 9.143934088021861e-05, "loss": 0.0426, "step": 11480 }, { "epoch": 8.284066330209084, "grad_norm": 0.133721262216568, "learning_rate": 9.142082735839645e-05, "loss": 0.0396, "step": 11490 }, { "epoch": 8.29127613554434, "grad_norm": 0.12560345232486725, "learning_rate": 9.140229571769715e-05, "loss": 0.0391, "step": 11500 }, { "epoch": 8.298485940879596, "grad_norm": 0.1493663638830185, "learning_rate": 9.138374596622709e-05, "loss": 0.0441, "step": 11510 }, { "epoch": 8.305695746214852, "grad_norm": 0.1571972370147705, "learning_rate": 9.136517811210059e-05, "loss": 0.0392, "step": 11520 }, { "epoch": 8.312905551550108, "grad_norm": 0.10701464861631393, "learning_rate": 9.134659216343984e-05, "loss": 0.0432, "step": 11530 }, { "epoch": 8.320115356885363, "grad_norm": 0.12584038078784943, "learning_rate": 9.132798812837494e-05, "loss": 0.0414, "step": 11540 }, { "epoch": 8.32732516222062, "grad_norm": 0.15766450762748718, "learning_rate": 9.130936601504396e-05, "loss": 0.0424, "step": 11550 }, { "epoch": 8.334534967555875, "grad_norm": 0.16506145894527435, "learning_rate": 9.129072583159284e-05, "loss": 0.0384, "step": 11560 }, { "epoch": 8.341744772891133, "grad_norm": 0.16921906173229218, "learning_rate": 9.127206758617542e-05, "loss": 0.0469, "step": 11570 }, { "epoch": 8.348954578226389, "grad_norm": 0.1439620554447174, "learning_rate": 9.125339128695346e-05, "loss": 0.037, "step": 11580 }, { "epoch": 8.356164383561644, "grad_norm": 0.1646518111228943, "learning_rate": 9.123469694209659e-05, "loss": 0.043, "step": 11590 }, { "epoch": 8.3633741888969, "grad_norm": 0.14541037380695343, "learning_rate": 9.121598455978239e-05, "loss": 0.0444, "step": 11600 }, { "epoch": 8.370583994232156, "grad_norm": 0.19693630933761597, "learning_rate": 9.119725414819624e-05, "loss": 0.0423, "step": 11610 }, { "epoch": 8.377793799567412, "grad_norm": 0.11040257662534714, "learning_rate": 9.117850571553149e-05, "loss": 0.0378, "step": 11620 }, { "epoch": 8.385003604902668, "grad_norm": 0.13731496036052704, "learning_rate": 9.115973926998935e-05, "loss": 0.044, "step": 11630 }, { "epoch": 8.392213410237924, "grad_norm": 0.1291133165359497, "learning_rate": 9.114095481977888e-05, "loss": 0.0487, "step": 11640 }, { "epoch": 8.39942321557318, "grad_norm": 0.13641898334026337, "learning_rate": 9.112215237311703e-05, "loss": 0.0375, "step": 11650 }, { "epoch": 8.406633020908435, "grad_norm": 0.110873743891716, "learning_rate": 9.110333193822867e-05, "loss": 0.0361, "step": 11660 }, { "epoch": 8.413842826243691, "grad_norm": 0.13758736848831177, "learning_rate": 9.108449352334645e-05, "loss": 0.037, "step": 11670 }, { "epoch": 8.421052631578947, "grad_norm": 0.16533920168876648, "learning_rate": 9.106563713671094e-05, "loss": 0.0398, "step": 11680 }, { "epoch": 8.428262436914203, "grad_norm": 0.11488465219736099, "learning_rate": 9.104676278657061e-05, "loss": 0.0349, "step": 11690 }, { "epoch": 8.435472242249459, "grad_norm": 0.10235647112131119, "learning_rate": 9.102787048118169e-05, "loss": 0.0389, "step": 11700 }, { "epoch": 8.442682047584714, "grad_norm": 0.1512441486120224, "learning_rate": 9.100896022880834e-05, "loss": 0.0498, "step": 11710 }, { "epoch": 8.449891852919972, "grad_norm": 0.20564602315425873, "learning_rate": 9.099003203772254e-05, "loss": 0.0368, "step": 11720 }, { "epoch": 8.457101658255228, "grad_norm": 0.1317523568868637, "learning_rate": 9.097108591620413e-05, "loss": 0.0338, "step": 11730 }, { "epoch": 8.464311463590484, "grad_norm": 0.15202833712100983, "learning_rate": 9.095212187254078e-05, "loss": 0.0372, "step": 11740 }, { "epoch": 8.47152126892574, "grad_norm": 0.1638357937335968, "learning_rate": 9.093313991502801e-05, "loss": 0.0382, "step": 11750 }, { "epoch": 8.478731074260995, "grad_norm": 0.15023648738861084, "learning_rate": 9.091414005196917e-05, "loss": 0.0386, "step": 11760 }, { "epoch": 8.485940879596251, "grad_norm": 0.1605110615491867, "learning_rate": 9.089512229167545e-05, "loss": 0.0414, "step": 11770 }, { "epoch": 8.493150684931507, "grad_norm": 0.19907338917255402, "learning_rate": 9.087608664246587e-05, "loss": 0.0413, "step": 11780 }, { "epoch": 8.500360490266763, "grad_norm": 0.15798743069171906, "learning_rate": 9.085703311266727e-05, "loss": 0.0388, "step": 11790 }, { "epoch": 8.507570295602019, "grad_norm": 0.10636866837739944, "learning_rate": 9.083796171061429e-05, "loss": 0.0435, "step": 11800 }, { "epoch": 8.514780100937275, "grad_norm": 0.1359826773405075, "learning_rate": 9.081887244464941e-05, "loss": 0.0377, "step": 11810 }, { "epoch": 8.52198990627253, "grad_norm": 0.1796482354402542, "learning_rate": 9.079976532312297e-05, "loss": 0.0431, "step": 11820 }, { "epoch": 8.529199711607786, "grad_norm": 0.14723339676856995, "learning_rate": 9.078064035439301e-05, "loss": 0.0412, "step": 11830 }, { "epoch": 8.536409516943042, "grad_norm": 0.12584614753723145, "learning_rate": 9.07614975468255e-05, "loss": 0.036, "step": 11840 }, { "epoch": 8.543619322278298, "grad_norm": 0.1451566070318222, "learning_rate": 9.074233690879412e-05, "loss": 0.0381, "step": 11850 }, { "epoch": 8.550829127613554, "grad_norm": 0.1680208444595337, "learning_rate": 9.072315844868038e-05, "loss": 0.04, "step": 11860 }, { "epoch": 8.55803893294881, "grad_norm": 0.16907846927642822, "learning_rate": 9.07039621748736e-05, "loss": 0.0465, "step": 11870 }, { "epoch": 8.565248738284065, "grad_norm": 0.19164422154426575, "learning_rate": 9.06847480957709e-05, "loss": 0.0469, "step": 11880 }, { "epoch": 8.572458543619323, "grad_norm": 0.16205434501171112, "learning_rate": 9.066551621977713e-05, "loss": 0.0394, "step": 11890 }, { "epoch": 8.579668348954579, "grad_norm": 0.15740415453910828, "learning_rate": 9.064626655530501e-05, "loss": 0.0466, "step": 11900 }, { "epoch": 8.586878154289835, "grad_norm": 0.23011450469493866, "learning_rate": 9.062699911077497e-05, "loss": 0.0438, "step": 11910 }, { "epoch": 8.59408795962509, "grad_norm": 0.17666277289390564, "learning_rate": 9.060771389461524e-05, "loss": 0.0451, "step": 11920 }, { "epoch": 8.601297764960346, "grad_norm": 0.16214799880981445, "learning_rate": 9.058841091526187e-05, "loss": 0.0395, "step": 11930 }, { "epoch": 8.608507570295602, "grad_norm": 0.1739824265241623, "learning_rate": 9.056909018115858e-05, "loss": 0.0428, "step": 11940 }, { "epoch": 8.615717375630858, "grad_norm": 0.19079336524009705, "learning_rate": 9.054975170075697e-05, "loss": 0.0449, "step": 11950 }, { "epoch": 8.622927180966114, "grad_norm": 0.14164060354232788, "learning_rate": 9.053039548251631e-05, "loss": 0.0344, "step": 11960 }, { "epoch": 8.63013698630137, "grad_norm": 0.13228294253349304, "learning_rate": 9.051102153490368e-05, "loss": 0.0472, "step": 11970 }, { "epoch": 8.637346791636626, "grad_norm": 0.13371320068836212, "learning_rate": 9.04916298663939e-05, "loss": 0.0388, "step": 11980 }, { "epoch": 8.644556596971881, "grad_norm": 0.10652283579111099, "learning_rate": 9.047222048546955e-05, "loss": 0.0375, "step": 11990 }, { "epoch": 8.651766402307137, "grad_norm": 0.21955327689647675, "learning_rate": 9.045279340062097e-05, "loss": 0.0381, "step": 12000 }, { "epoch": 8.658976207642393, "grad_norm": 0.15618158876895905, "learning_rate": 9.043334862034618e-05, "loss": 0.0429, "step": 12010 }, { "epoch": 8.666186012977649, "grad_norm": 0.12448591738939285, "learning_rate": 9.041388615315102e-05, "loss": 0.0385, "step": 12020 }, { "epoch": 8.673395818312905, "grad_norm": 0.1702316552400589, "learning_rate": 9.039440600754905e-05, "loss": 0.0451, "step": 12030 }, { "epoch": 8.680605623648162, "grad_norm": 0.19358186423778534, "learning_rate": 9.037490819206151e-05, "loss": 0.0416, "step": 12040 }, { "epoch": 8.687815428983418, "grad_norm": 0.19677259027957916, "learning_rate": 9.035539271521744e-05, "loss": 0.045, "step": 12050 }, { "epoch": 8.695025234318674, "grad_norm": 0.13242150843143463, "learning_rate": 9.033585958555356e-05, "loss": 0.0393, "step": 12060 }, { "epoch": 8.70223503965393, "grad_norm": 0.13787172734737396, "learning_rate": 9.031630881161431e-05, "loss": 0.0482, "step": 12070 }, { "epoch": 8.709444844989186, "grad_norm": 0.1482103168964386, "learning_rate": 9.029674040195186e-05, "loss": 0.0382, "step": 12080 }, { "epoch": 8.716654650324442, "grad_norm": 0.1990579515695572, "learning_rate": 9.027715436512613e-05, "loss": 0.0442, "step": 12090 }, { "epoch": 8.723864455659697, "grad_norm": 0.2010902762413025, "learning_rate": 9.02575507097047e-05, "loss": 0.0407, "step": 12100 }, { "epoch": 8.731074260994953, "grad_norm": 0.16414174437522888, "learning_rate": 9.023792944426286e-05, "loss": 0.039, "step": 12110 }, { "epoch": 8.738284066330209, "grad_norm": 0.1520642340183258, "learning_rate": 9.021829057738364e-05, "loss": 0.0432, "step": 12120 }, { "epoch": 8.745493871665465, "grad_norm": 0.13210882246494293, "learning_rate": 9.019863411765775e-05, "loss": 0.0394, "step": 12130 }, { "epoch": 8.75270367700072, "grad_norm": 0.14770592749118805, "learning_rate": 9.017896007368357e-05, "loss": 0.0417, "step": 12140 }, { "epoch": 8.759913482335977, "grad_norm": 0.1470566987991333, "learning_rate": 9.015926845406722e-05, "loss": 0.0423, "step": 12150 }, { "epoch": 8.767123287671232, "grad_norm": 0.12920226156711578, "learning_rate": 9.013955926742245e-05, "loss": 0.0442, "step": 12160 }, { "epoch": 8.774333093006488, "grad_norm": 0.1380981057882309, "learning_rate": 9.011983252237077e-05, "loss": 0.0338, "step": 12170 }, { "epoch": 8.781542898341744, "grad_norm": 0.11730717122554779, "learning_rate": 9.01000882275413e-05, "loss": 0.0405, "step": 12180 }, { "epoch": 8.788752703677, "grad_norm": 0.15444527566432953, "learning_rate": 9.008032639157088e-05, "loss": 0.0429, "step": 12190 }, { "epoch": 8.795962509012256, "grad_norm": 0.19882801175117493, "learning_rate": 9.006054702310401e-05, "loss": 0.0398, "step": 12200 }, { "epoch": 8.803172314347513, "grad_norm": 0.1746983677148819, "learning_rate": 9.004075013079283e-05, "loss": 0.0392, "step": 12210 }, { "epoch": 8.81038211968277, "grad_norm": 0.14175128936767578, "learning_rate": 9.00209357232972e-05, "loss": 0.0407, "step": 12220 }, { "epoch": 8.817591925018025, "grad_norm": 0.14078709483146667, "learning_rate": 9.000110380928461e-05, "loss": 0.0407, "step": 12230 }, { "epoch": 8.82480173035328, "grad_norm": 0.12269389629364014, "learning_rate": 8.998125439743021e-05, "loss": 0.0427, "step": 12240 }, { "epoch": 8.832011535688537, "grad_norm": 0.15112905204296112, "learning_rate": 8.996138749641682e-05, "loss": 0.0402, "step": 12250 }, { "epoch": 8.839221341023793, "grad_norm": 0.274795264005661, "learning_rate": 8.994150311493488e-05, "loss": 0.0439, "step": 12260 }, { "epoch": 8.846431146359048, "grad_norm": 0.15981154143810272, "learning_rate": 8.992160126168247e-05, "loss": 0.0387, "step": 12270 }, { "epoch": 8.853640951694304, "grad_norm": 0.1612047255039215, "learning_rate": 8.99016819453654e-05, "loss": 0.0432, "step": 12280 }, { "epoch": 8.86085075702956, "grad_norm": 0.14884015917778015, "learning_rate": 8.988174517469702e-05, "loss": 0.0427, "step": 12290 }, { "epoch": 8.868060562364816, "grad_norm": 0.13047946989536285, "learning_rate": 8.986179095839835e-05, "loss": 0.0376, "step": 12300 }, { "epoch": 8.875270367700072, "grad_norm": 0.18515831232070923, "learning_rate": 8.984181930519804e-05, "loss": 0.0394, "step": 12310 }, { "epoch": 8.882480173035328, "grad_norm": 0.1466568559408188, "learning_rate": 8.982183022383237e-05, "loss": 0.0391, "step": 12320 }, { "epoch": 8.889689978370583, "grad_norm": 0.15267585217952728, "learning_rate": 8.980182372304525e-05, "loss": 0.0381, "step": 12330 }, { "epoch": 8.89689978370584, "grad_norm": 0.1615409255027771, "learning_rate": 8.97817998115882e-05, "loss": 0.0421, "step": 12340 }, { "epoch": 8.904109589041095, "grad_norm": 0.11830952763557434, "learning_rate": 8.976175849822038e-05, "loss": 0.0379, "step": 12350 }, { "epoch": 8.911319394376353, "grad_norm": 0.1374216228723526, "learning_rate": 8.97416997917085e-05, "loss": 0.0391, "step": 12360 }, { "epoch": 8.918529199711609, "grad_norm": 0.16502855718135834, "learning_rate": 8.972162370082695e-05, "loss": 0.0376, "step": 12370 }, { "epoch": 8.925739005046864, "grad_norm": 0.15523836016654968, "learning_rate": 8.97015302343577e-05, "loss": 0.0415, "step": 12380 }, { "epoch": 8.93294881038212, "grad_norm": 0.15073782205581665, "learning_rate": 8.968141940109027e-05, "loss": 0.0437, "step": 12390 }, { "epoch": 8.940158615717376, "grad_norm": 0.10537776350975037, "learning_rate": 8.966129120982188e-05, "loss": 0.0404, "step": 12400 }, { "epoch": 8.947368421052632, "grad_norm": 0.15154288709163666, "learning_rate": 8.964114566935724e-05, "loss": 0.0447, "step": 12410 }, { "epoch": 8.954578226387888, "grad_norm": 0.14509274065494537, "learning_rate": 8.962098278850871e-05, "loss": 0.0393, "step": 12420 }, { "epoch": 8.961788031723144, "grad_norm": 0.09806293994188309, "learning_rate": 8.960080257609622e-05, "loss": 0.0417, "step": 12430 }, { "epoch": 8.9689978370584, "grad_norm": 0.12364383041858673, "learning_rate": 8.95806050409473e-05, "loss": 0.04, "step": 12440 }, { "epoch": 8.976207642393655, "grad_norm": 0.13639219105243683, "learning_rate": 8.9560390191897e-05, "loss": 0.0403, "step": 12450 }, { "epoch": 8.983417447728911, "grad_norm": 0.1455855369567871, "learning_rate": 8.9540158037788e-05, "loss": 0.0389, "step": 12460 }, { "epoch": 8.990627253064167, "grad_norm": 0.15429459512233734, "learning_rate": 8.951990858747054e-05, "loss": 0.0402, "step": 12470 }, { "epoch": 8.997837058399423, "grad_norm": 0.14744900166988373, "learning_rate": 8.94996418498024e-05, "loss": 0.0398, "step": 12480 }, { "epoch": 9.005046863734679, "grad_norm": 0.1545485109090805, "learning_rate": 8.947935783364896e-05, "loss": 0.0373, "step": 12490 }, { "epoch": 9.012256669069934, "grad_norm": 0.18219012022018433, "learning_rate": 8.945905654788311e-05, "loss": 0.0397, "step": 12500 }, { "epoch": 9.01946647440519, "grad_norm": 0.18780073523521423, "learning_rate": 8.943873800138535e-05, "loss": 0.0368, "step": 12510 }, { "epoch": 9.026676279740448, "grad_norm": 0.16984772682189941, "learning_rate": 8.94184022030437e-05, "loss": 0.0375, "step": 12520 }, { "epoch": 9.033886085075704, "grad_norm": 0.08630689233541489, "learning_rate": 8.939804916175372e-05, "loss": 0.0312, "step": 12530 }, { "epoch": 9.04109589041096, "grad_norm": 0.11705980449914932, "learning_rate": 8.93776788864185e-05, "loss": 0.0366, "step": 12540 }, { "epoch": 9.048305695746215, "grad_norm": 0.15599580109119415, "learning_rate": 8.935729138594873e-05, "loss": 0.0377, "step": 12550 }, { "epoch": 9.055515501081471, "grad_norm": 0.14320024847984314, "learning_rate": 8.933688666926258e-05, "loss": 0.0403, "step": 12560 }, { "epoch": 9.062725306416727, "grad_norm": 0.12159954011440277, "learning_rate": 8.931646474528575e-05, "loss": 0.0362, "step": 12570 }, { "epoch": 9.069935111751983, "grad_norm": 0.15486128628253937, "learning_rate": 8.929602562295151e-05, "loss": 0.0481, "step": 12580 }, { "epoch": 9.077144917087239, "grad_norm": 0.11853326112031937, "learning_rate": 8.92755693112006e-05, "loss": 0.0385, "step": 12590 }, { "epoch": 9.084354722422495, "grad_norm": 0.14583568274974823, "learning_rate": 8.925509581898136e-05, "loss": 0.0374, "step": 12600 }, { "epoch": 9.09156452775775, "grad_norm": 0.11259184032678604, "learning_rate": 8.923460515524951e-05, "loss": 0.0385, "step": 12610 }, { "epoch": 9.098774333093006, "grad_norm": 0.1243966594338417, "learning_rate": 8.921409732896842e-05, "loss": 0.038, "step": 12620 }, { "epoch": 9.105984138428262, "grad_norm": 0.12035287171602249, "learning_rate": 8.919357234910887e-05, "loss": 0.0353, "step": 12630 }, { "epoch": 9.113193943763518, "grad_norm": 0.12951473891735077, "learning_rate": 8.917303022464923e-05, "loss": 0.0391, "step": 12640 }, { "epoch": 9.120403749098774, "grad_norm": 0.17548640072345734, "learning_rate": 8.915247096457531e-05, "loss": 0.0422, "step": 12650 }, { "epoch": 9.12761355443403, "grad_norm": 0.1842692643404007, "learning_rate": 8.91318945778804e-05, "loss": 0.0381, "step": 12660 }, { "epoch": 9.134823359769285, "grad_norm": 0.14171834290027618, "learning_rate": 8.911130107356534e-05, "loss": 0.0374, "step": 12670 }, { "epoch": 9.142033165104543, "grad_norm": 0.14315983653068542, "learning_rate": 8.90906904606384e-05, "loss": 0.0409, "step": 12680 }, { "epoch": 9.149242970439799, "grad_norm": 0.19885583221912384, "learning_rate": 8.90700627481154e-05, "loss": 0.0369, "step": 12690 }, { "epoch": 9.156452775775055, "grad_norm": 0.15828077495098114, "learning_rate": 8.904941794501957e-05, "loss": 0.046, "step": 12700 }, { "epoch": 9.16366258111031, "grad_norm": 0.1341872662305832, "learning_rate": 8.902875606038166e-05, "loss": 0.0392, "step": 12710 }, { "epoch": 9.170872386445566, "grad_norm": 0.20623046159744263, "learning_rate": 8.900807710323989e-05, "loss": 0.0404, "step": 12720 }, { "epoch": 9.178082191780822, "grad_norm": 0.15438081324100494, "learning_rate": 8.898738108263993e-05, "loss": 0.0392, "step": 12730 }, { "epoch": 9.185291997116078, "grad_norm": 0.1504364311695099, "learning_rate": 8.896666800763491e-05, "loss": 0.037, "step": 12740 }, { "epoch": 9.192501802451334, "grad_norm": 0.1437775194644928, "learning_rate": 8.894593788728546e-05, "loss": 0.0448, "step": 12750 }, { "epoch": 9.19971160778659, "grad_norm": 0.131052628159523, "learning_rate": 8.892519073065961e-05, "loss": 0.0407, "step": 12760 }, { "epoch": 9.206921413121846, "grad_norm": 0.16641941666603088, "learning_rate": 8.89044265468329e-05, "loss": 0.0395, "step": 12770 }, { "epoch": 9.214131218457101, "grad_norm": 0.17286346852779388, "learning_rate": 8.888364534488827e-05, "loss": 0.0366, "step": 12780 }, { "epoch": 9.221341023792357, "grad_norm": 0.1749536544084549, "learning_rate": 8.886284713391613e-05, "loss": 0.0372, "step": 12790 }, { "epoch": 9.228550829127613, "grad_norm": 0.12316830456256866, "learning_rate": 8.884203192301431e-05, "loss": 0.0446, "step": 12800 }, { "epoch": 9.235760634462869, "grad_norm": 0.16257219016551971, "learning_rate": 8.88211997212881e-05, "loss": 0.0423, "step": 12810 }, { "epoch": 9.242970439798125, "grad_norm": 0.11240643262863159, "learning_rate": 8.880035053785023e-05, "loss": 0.04, "step": 12820 }, { "epoch": 9.25018024513338, "grad_norm": 0.13183435797691345, "learning_rate": 8.877948438182083e-05, "loss": 0.0396, "step": 12830 }, { "epoch": 9.257390050468638, "grad_norm": 0.15821562707424164, "learning_rate": 8.875860126232745e-05, "loss": 0.0479, "step": 12840 }, { "epoch": 9.264599855803894, "grad_norm": 0.12304770946502686, "learning_rate": 8.87377011885051e-05, "loss": 0.0393, "step": 12850 }, { "epoch": 9.27180966113915, "grad_norm": 0.13865762948989868, "learning_rate": 8.871678416949617e-05, "loss": 0.0372, "step": 12860 }, { "epoch": 9.279019466474406, "grad_norm": 0.13057959079742432, "learning_rate": 8.869585021445046e-05, "loss": 0.0365, "step": 12870 }, { "epoch": 9.286229271809662, "grad_norm": 0.10571445524692535, "learning_rate": 8.867489933252521e-05, "loss": 0.0382, "step": 12880 }, { "epoch": 9.293439077144917, "grad_norm": 0.13674010336399078, "learning_rate": 8.865393153288504e-05, "loss": 0.0391, "step": 12890 }, { "epoch": 9.300648882480173, "grad_norm": 0.1600736379623413, "learning_rate": 8.8632946824702e-05, "loss": 0.0386, "step": 12900 }, { "epoch": 9.307858687815429, "grad_norm": 0.20721079409122467, "learning_rate": 8.86119452171555e-05, "loss": 0.0371, "step": 12910 }, { "epoch": 9.315068493150685, "grad_norm": 0.10678092390298843, "learning_rate": 8.859092671943234e-05, "loss": 0.0382, "step": 12920 }, { "epoch": 9.32227829848594, "grad_norm": 0.14070959389209747, "learning_rate": 8.856989134072676e-05, "loss": 0.0446, "step": 12930 }, { "epoch": 9.329488103821197, "grad_norm": 0.12288985401391983, "learning_rate": 8.85488390902403e-05, "loss": 0.0367, "step": 12940 }, { "epoch": 9.336697909156452, "grad_norm": 0.16696126759052277, "learning_rate": 8.852776997718199e-05, "loss": 0.0417, "step": 12950 }, { "epoch": 9.343907714491708, "grad_norm": 0.12119404971599579, "learning_rate": 8.850668401076812e-05, "loss": 0.0393, "step": 12960 }, { "epoch": 9.351117519826964, "grad_norm": 0.14438800513744354, "learning_rate": 8.848558120022246e-05, "loss": 0.0377, "step": 12970 }, { "epoch": 9.35832732516222, "grad_norm": 0.1312090903520584, "learning_rate": 8.846446155477603e-05, "loss": 0.0424, "step": 12980 }, { "epoch": 9.365537130497476, "grad_norm": 0.12474022060632706, "learning_rate": 8.844332508366735e-05, "loss": 0.0452, "step": 12990 }, { "epoch": 9.372746935832733, "grad_norm": 0.1423903852701187, "learning_rate": 8.84221717961422e-05, "loss": 0.04, "step": 13000 }, { "epoch": 9.37995674116799, "grad_norm": 0.12799759209156036, "learning_rate": 8.840100170145374e-05, "loss": 0.0484, "step": 13010 }, { "epoch": 9.387166546503245, "grad_norm": 0.15476751327514648, "learning_rate": 8.837981480886249e-05, "loss": 0.0396, "step": 13020 }, { "epoch": 9.3943763518385, "grad_norm": 0.20030458271503448, "learning_rate": 8.835861112763633e-05, "loss": 0.0489, "step": 13030 }, { "epoch": 9.401586157173757, "grad_norm": 0.18662169575691223, "learning_rate": 8.833739066705044e-05, "loss": 0.0428, "step": 13040 }, { "epoch": 9.408795962509013, "grad_norm": 0.15615229308605194, "learning_rate": 8.831615343638742e-05, "loss": 0.037, "step": 13050 }, { "epoch": 9.416005767844268, "grad_norm": 0.1602858603000641, "learning_rate": 8.829489944493711e-05, "loss": 0.0433, "step": 13060 }, { "epoch": 9.423215573179524, "grad_norm": 0.1658683568239212, "learning_rate": 8.827362870199675e-05, "loss": 0.0389, "step": 13070 }, { "epoch": 9.43042537851478, "grad_norm": 0.1518155187368393, "learning_rate": 8.825234121687089e-05, "loss": 0.0363, "step": 13080 }, { "epoch": 9.437635183850036, "grad_norm": 0.16767354309558868, "learning_rate": 8.823103699887139e-05, "loss": 0.0422, "step": 13090 }, { "epoch": 9.444844989185292, "grad_norm": 0.1629917025566101, "learning_rate": 8.820971605731745e-05, "loss": 0.0366, "step": 13100 }, { "epoch": 9.452054794520548, "grad_norm": 0.1390143185853958, "learning_rate": 8.818837840153556e-05, "loss": 0.0451, "step": 13110 }, { "epoch": 9.459264599855803, "grad_norm": 0.17891009151935577, "learning_rate": 8.816702404085952e-05, "loss": 0.0403, "step": 13120 }, { "epoch": 9.46647440519106, "grad_norm": 0.15166087448596954, "learning_rate": 8.814565298463048e-05, "loss": 0.0427, "step": 13130 }, { "epoch": 9.473684210526315, "grad_norm": 0.1423913836479187, "learning_rate": 8.812426524219688e-05, "loss": 0.0376, "step": 13140 }, { "epoch": 9.48089401586157, "grad_norm": 0.1364901065826416, "learning_rate": 8.81028608229144e-05, "loss": 0.0382, "step": 13150 }, { "epoch": 9.488103821196828, "grad_norm": 0.1445242017507553, "learning_rate": 8.808143973614611e-05, "loss": 0.0345, "step": 13160 }, { "epoch": 9.495313626532084, "grad_norm": 0.1285402625799179, "learning_rate": 8.806000199126228e-05, "loss": 0.0359, "step": 13170 }, { "epoch": 9.50252343186734, "grad_norm": 0.17515221238136292, "learning_rate": 8.803854759764052e-05, "loss": 0.0417, "step": 13180 }, { "epoch": 9.509733237202596, "grad_norm": 0.15479913353919983, "learning_rate": 8.801707656466572e-05, "loss": 0.0322, "step": 13190 }, { "epoch": 9.516943042537852, "grad_norm": 0.12466590851545334, "learning_rate": 8.799558890173003e-05, "loss": 0.036, "step": 13200 }, { "epoch": 9.524152847873108, "grad_norm": 0.1537778675556183, "learning_rate": 8.79740846182329e-05, "loss": 0.0438, "step": 13210 }, { "epoch": 9.531362653208364, "grad_norm": 0.19544406235218048, "learning_rate": 8.7952563723581e-05, "loss": 0.0416, "step": 13220 }, { "epoch": 9.53857245854362, "grad_norm": 0.1306866705417633, "learning_rate": 8.793102622718834e-05, "loss": 0.0412, "step": 13230 }, { "epoch": 9.545782263878875, "grad_norm": 0.17395326495170593, "learning_rate": 8.790947213847613e-05, "loss": 0.041, "step": 13240 }, { "epoch": 9.552992069214131, "grad_norm": 0.14327862858772278, "learning_rate": 8.788790146687286e-05, "loss": 0.0409, "step": 13250 }, { "epoch": 9.560201874549387, "grad_norm": 0.2624029219150543, "learning_rate": 8.786631422181429e-05, "loss": 0.0416, "step": 13260 }, { "epoch": 9.567411679884643, "grad_norm": 0.12188396602869034, "learning_rate": 8.78447104127434e-05, "loss": 0.0405, "step": 13270 }, { "epoch": 9.574621485219899, "grad_norm": 0.12201745063066483, "learning_rate": 8.782309004911042e-05, "loss": 0.0467, "step": 13280 }, { "epoch": 9.581831290555154, "grad_norm": 0.116396963596344, "learning_rate": 8.780145314037286e-05, "loss": 0.0365, "step": 13290 }, { "epoch": 9.58904109589041, "grad_norm": 0.13820314407348633, "learning_rate": 8.777979969599542e-05, "loss": 0.0458, "step": 13300 }, { "epoch": 9.596250901225666, "grad_norm": 0.11993037164211273, "learning_rate": 8.775812972545006e-05, "loss": 0.04, "step": 13310 }, { "epoch": 9.603460706560924, "grad_norm": 0.15317583084106445, "learning_rate": 8.773644323821596e-05, "loss": 0.0365, "step": 13320 }, { "epoch": 9.61067051189618, "grad_norm": 0.13723187148571014, "learning_rate": 8.771474024377953e-05, "loss": 0.0387, "step": 13330 }, { "epoch": 9.617880317231435, "grad_norm": 0.11769822984933853, "learning_rate": 8.769302075163438e-05, "loss": 0.0415, "step": 13340 }, { "epoch": 9.625090122566691, "grad_norm": 0.16058826446533203, "learning_rate": 8.767128477128137e-05, "loss": 0.0405, "step": 13350 }, { "epoch": 9.632299927901947, "grad_norm": 0.11829360574483871, "learning_rate": 8.764953231222854e-05, "loss": 0.0375, "step": 13360 }, { "epoch": 9.639509733237203, "grad_norm": 0.11676555871963501, "learning_rate": 8.762776338399119e-05, "loss": 0.0358, "step": 13370 }, { "epoch": 9.646719538572459, "grad_norm": 0.13704657554626465, "learning_rate": 8.760597799609176e-05, "loss": 0.0368, "step": 13380 }, { "epoch": 9.653929343907715, "grad_norm": 0.12191376090049744, "learning_rate": 8.758417615805992e-05, "loss": 0.0407, "step": 13390 }, { "epoch": 9.66113914924297, "grad_norm": 0.10687726736068726, "learning_rate": 8.756235787943254e-05, "loss": 0.0351, "step": 13400 }, { "epoch": 9.668348954578226, "grad_norm": 0.13874348998069763, "learning_rate": 8.754052316975367e-05, "loss": 0.0361, "step": 13410 }, { "epoch": 9.675558759913482, "grad_norm": 0.12948444485664368, "learning_rate": 8.751867203857455e-05, "loss": 0.0422, "step": 13420 }, { "epoch": 9.682768565248738, "grad_norm": 0.14608325064182281, "learning_rate": 8.749680449545363e-05, "loss": 0.0387, "step": 13430 }, { "epoch": 9.689978370583994, "grad_norm": 0.14064916968345642, "learning_rate": 8.747492054995649e-05, "loss": 0.042, "step": 13440 }, { "epoch": 9.69718817591925, "grad_norm": 0.15166892111301422, "learning_rate": 8.745302021165595e-05, "loss": 0.0431, "step": 13450 }, { "epoch": 9.704397981254505, "grad_norm": 0.12810677289962769, "learning_rate": 8.743110349013192e-05, "loss": 0.0487, "step": 13460 }, { "epoch": 9.711607786589763, "grad_norm": 0.1250915676355362, "learning_rate": 8.740917039497153e-05, "loss": 0.039, "step": 13470 }, { "epoch": 9.718817591925019, "grad_norm": 0.11790530383586884, "learning_rate": 8.738722093576906e-05, "loss": 0.0369, "step": 13480 }, { "epoch": 9.726027397260275, "grad_norm": 0.1323888897895813, "learning_rate": 8.736525512212597e-05, "loss": 0.0498, "step": 13490 }, { "epoch": 9.73323720259553, "grad_norm": 0.14156077802181244, "learning_rate": 8.734327296365084e-05, "loss": 0.0433, "step": 13500 }, { "epoch": 9.740447007930786, "grad_norm": 0.15011745691299438, "learning_rate": 8.732127446995939e-05, "loss": 0.0382, "step": 13510 }, { "epoch": 9.747656813266042, "grad_norm": 0.13490544259548187, "learning_rate": 8.729925965067454e-05, "loss": 0.04, "step": 13520 }, { "epoch": 9.754866618601298, "grad_norm": 0.14257988333702087, "learning_rate": 8.72772285154263e-05, "loss": 0.0345, "step": 13530 }, { "epoch": 9.762076423936554, "grad_norm": 0.12396596372127533, "learning_rate": 8.725518107385187e-05, "loss": 0.0362, "step": 13540 }, { "epoch": 9.76928622927181, "grad_norm": 0.16632238030433655, "learning_rate": 8.72331173355955e-05, "loss": 0.0465, "step": 13550 }, { "epoch": 9.776496034607066, "grad_norm": 0.22555115818977356, "learning_rate": 8.721103731030867e-05, "loss": 0.0411, "step": 13560 }, { "epoch": 9.783705839942321, "grad_norm": 0.1132405623793602, "learning_rate": 8.718894100764989e-05, "loss": 0.035, "step": 13570 }, { "epoch": 9.790915645277577, "grad_norm": 0.14794203639030457, "learning_rate": 8.716682843728485e-05, "loss": 0.0423, "step": 13580 }, { "epoch": 9.798125450612833, "grad_norm": 0.1739635318517685, "learning_rate": 8.714469960888634e-05, "loss": 0.0409, "step": 13590 }, { "epoch": 9.805335255948089, "grad_norm": 0.16667640209197998, "learning_rate": 8.712255453213427e-05, "loss": 0.038, "step": 13600 }, { "epoch": 9.812545061283345, "grad_norm": 0.12940531969070435, "learning_rate": 8.710039321671563e-05, "loss": 0.0373, "step": 13610 }, { "epoch": 9.8197548666186, "grad_norm": 0.15601137280464172, "learning_rate": 8.707821567232456e-05, "loss": 0.0416, "step": 13620 }, { "epoch": 9.826964671953856, "grad_norm": 0.17184831202030182, "learning_rate": 8.705602190866225e-05, "loss": 0.0383, "step": 13630 }, { "epoch": 9.834174477289114, "grad_norm": 0.18498975038528442, "learning_rate": 8.703381193543701e-05, "loss": 0.0407, "step": 13640 }, { "epoch": 9.84138428262437, "grad_norm": 0.19416339695453644, "learning_rate": 8.701158576236423e-05, "loss": 0.0402, "step": 13650 }, { "epoch": 9.848594087959626, "grad_norm": 0.25883784890174866, "learning_rate": 8.69893433991664e-05, "loss": 0.0457, "step": 13660 }, { "epoch": 9.855803893294881, "grad_norm": 0.1820354014635086, "learning_rate": 8.69670848555731e-05, "loss": 0.0386, "step": 13670 }, { "epoch": 9.863013698630137, "grad_norm": 0.14367546141147614, "learning_rate": 8.694481014132096e-05, "loss": 0.0379, "step": 13680 }, { "epoch": 9.870223503965393, "grad_norm": 0.15231461822986603, "learning_rate": 8.69225192661537e-05, "loss": 0.0414, "step": 13690 }, { "epoch": 9.877433309300649, "grad_norm": 0.12562736868858337, "learning_rate": 8.690021223982208e-05, "loss": 0.0415, "step": 13700 }, { "epoch": 9.884643114635905, "grad_norm": 0.1417606920003891, "learning_rate": 8.687788907208398e-05, "loss": 0.039, "step": 13710 }, { "epoch": 9.89185291997116, "grad_norm": 0.11819814145565033, "learning_rate": 8.685554977270431e-05, "loss": 0.0374, "step": 13720 }, { "epoch": 9.899062725306417, "grad_norm": 0.09528694301843643, "learning_rate": 8.683319435145503e-05, "loss": 0.0364, "step": 13730 }, { "epoch": 9.906272530641672, "grad_norm": 0.09821736067533493, "learning_rate": 8.681082281811517e-05, "loss": 0.036, "step": 13740 }, { "epoch": 9.913482335976928, "grad_norm": 0.1507730931043625, "learning_rate": 8.67884351824708e-05, "loss": 0.0433, "step": 13750 }, { "epoch": 9.920692141312184, "grad_norm": 0.1838659793138504, "learning_rate": 8.676603145431501e-05, "loss": 0.0413, "step": 13760 }, { "epoch": 9.92790194664744, "grad_norm": 0.13372042775154114, "learning_rate": 8.674361164344799e-05, "loss": 0.039, "step": 13770 }, { "epoch": 9.935111751982696, "grad_norm": 0.15603852272033691, "learning_rate": 8.672117575967688e-05, "loss": 0.0394, "step": 13780 }, { "epoch": 9.942321557317953, "grad_norm": 0.1271107941865921, "learning_rate": 8.669872381281595e-05, "loss": 0.0384, "step": 13790 }, { "epoch": 9.94953136265321, "grad_norm": 0.15719304978847504, "learning_rate": 8.667625581268639e-05, "loss": 0.0418, "step": 13800 }, { "epoch": 9.956741167988465, "grad_norm": 0.1501016467809677, "learning_rate": 8.665377176911651e-05, "loss": 0.0403, "step": 13810 }, { "epoch": 9.96395097332372, "grad_norm": 0.14710570871829987, "learning_rate": 8.663127169194159e-05, "loss": 0.0378, "step": 13820 }, { "epoch": 9.971160778658977, "grad_norm": 0.14104709029197693, "learning_rate": 8.660875559100389e-05, "loss": 0.04, "step": 13830 }, { "epoch": 9.978370583994232, "grad_norm": 0.14575888216495514, "learning_rate": 8.658622347615274e-05, "loss": 0.0445, "step": 13840 }, { "epoch": 9.985580389329488, "grad_norm": 0.16451489925384521, "learning_rate": 8.656367535724448e-05, "loss": 0.0349, "step": 13850 }, { "epoch": 9.992790194664744, "grad_norm": 0.13807399570941925, "learning_rate": 8.65411112441424e-05, "loss": 0.0382, "step": 13860 }, { "epoch": 10.0, "grad_norm": 0.1962505728006363, "learning_rate": 8.651853114671679e-05, "loss": 0.0391, "step": 13870 }, { "epoch": 10.007209805335256, "grad_norm": 0.1789633333683014, "learning_rate": 8.649593507484499e-05, "loss": 0.0361, "step": 13880 }, { "epoch": 10.014419610670512, "grad_norm": 0.12588505446910858, "learning_rate": 8.647332303841126e-05, "loss": 0.0364, "step": 13890 }, { "epoch": 10.021629416005768, "grad_norm": 0.14602340757846832, "learning_rate": 8.645069504730689e-05, "loss": 0.0426, "step": 13900 }, { "epoch": 10.028839221341023, "grad_norm": 0.13333895802497864, "learning_rate": 8.64280511114301e-05, "loss": 0.0396, "step": 13910 }, { "epoch": 10.03604902667628, "grad_norm": 0.16312016546726227, "learning_rate": 8.640539124068617e-05, "loss": 0.039, "step": 13920 }, { "epoch": 10.043258832011535, "grad_norm": 0.0845184400677681, "learning_rate": 8.638271544498727e-05, "loss": 0.0387, "step": 13930 }, { "epoch": 10.05046863734679, "grad_norm": 0.15383484959602356, "learning_rate": 8.636002373425257e-05, "loss": 0.0395, "step": 13940 }, { "epoch": 10.057678442682047, "grad_norm": 0.1307400017976761, "learning_rate": 8.633731611840817e-05, "loss": 0.0364, "step": 13950 }, { "epoch": 10.064888248017304, "grad_norm": 0.1593732237815857, "learning_rate": 8.631459260738717e-05, "loss": 0.0399, "step": 13960 }, { "epoch": 10.07209805335256, "grad_norm": 0.278616726398468, "learning_rate": 8.62918532111296e-05, "loss": 0.0382, "step": 13970 }, { "epoch": 10.079307858687816, "grad_norm": 0.13729272782802582, "learning_rate": 8.626909793958248e-05, "loss": 0.0397, "step": 13980 }, { "epoch": 10.086517664023072, "grad_norm": 0.13007783889770508, "learning_rate": 8.624632680269969e-05, "loss": 0.04, "step": 13990 }, { "epoch": 10.093727469358328, "grad_norm": 0.1736506074666977, "learning_rate": 8.622353981044212e-05, "loss": 0.0351, "step": 14000 }, { "epoch": 10.100937274693583, "grad_norm": 0.1790018230676651, "learning_rate": 8.620073697277757e-05, "loss": 0.0338, "step": 14010 }, { "epoch": 10.10814708002884, "grad_norm": 0.1933744251728058, "learning_rate": 8.617791829968079e-05, "loss": 0.0448, "step": 14020 }, { "epoch": 10.115356885364095, "grad_norm": 0.11191431432962418, "learning_rate": 8.615508380113344e-05, "loss": 0.0365, "step": 14030 }, { "epoch": 10.122566690699351, "grad_norm": 0.12426866590976715, "learning_rate": 8.613223348712408e-05, "loss": 0.0358, "step": 14040 }, { "epoch": 10.129776496034607, "grad_norm": 0.10764645040035248, "learning_rate": 8.610936736764824e-05, "loss": 0.0366, "step": 14050 }, { "epoch": 10.136986301369863, "grad_norm": 0.09998686611652374, "learning_rate": 8.608648545270833e-05, "loss": 0.0369, "step": 14060 }, { "epoch": 10.144196106705119, "grad_norm": 0.14933665096759796, "learning_rate": 8.606358775231366e-05, "loss": 0.0433, "step": 14070 }, { "epoch": 10.151405912040374, "grad_norm": 0.11756637692451477, "learning_rate": 8.60406742764805e-05, "loss": 0.0416, "step": 14080 }, { "epoch": 10.15861571737563, "grad_norm": 0.17583613097667694, "learning_rate": 8.601774503523195e-05, "loss": 0.0393, "step": 14090 }, { "epoch": 10.165825522710886, "grad_norm": 0.14836746454238892, "learning_rate": 8.599480003859805e-05, "loss": 0.0364, "step": 14100 }, { "epoch": 10.173035328046144, "grad_norm": 0.16865059733390808, "learning_rate": 8.597183929661573e-05, "loss": 0.0383, "step": 14110 }, { "epoch": 10.1802451333814, "grad_norm": 0.10921519249677658, "learning_rate": 8.594886281932879e-05, "loss": 0.0332, "step": 14120 }, { "epoch": 10.187454938716655, "grad_norm": 0.12655028700828552, "learning_rate": 8.59258706167879e-05, "loss": 0.0365, "step": 14130 }, { "epoch": 10.194664744051911, "grad_norm": 0.13513021171092987, "learning_rate": 8.590286269905068e-05, "loss": 0.0443, "step": 14140 }, { "epoch": 10.201874549387167, "grad_norm": 0.17244680225849152, "learning_rate": 8.587983907618154e-05, "loss": 0.0415, "step": 14150 }, { "epoch": 10.209084354722423, "grad_norm": 0.19052739441394806, "learning_rate": 8.585679975825178e-05, "loss": 0.0429, "step": 14160 }, { "epoch": 10.216294160057679, "grad_norm": 0.15007026493549347, "learning_rate": 8.583374475533962e-05, "loss": 0.0346, "step": 14170 }, { "epoch": 10.223503965392934, "grad_norm": 0.12205681204795837, "learning_rate": 8.581067407753009e-05, "loss": 0.0356, "step": 14180 }, { "epoch": 10.23071377072819, "grad_norm": 0.17930348217487335, "learning_rate": 8.578758773491507e-05, "loss": 0.0362, "step": 14190 }, { "epoch": 10.237923576063446, "grad_norm": 0.13267379999160767, "learning_rate": 8.576448573759332e-05, "loss": 0.0413, "step": 14200 }, { "epoch": 10.245133381398702, "grad_norm": 0.11070053279399872, "learning_rate": 8.574136809567044e-05, "loss": 0.038, "step": 14210 }, { "epoch": 10.252343186733958, "grad_norm": 0.18272890150547028, "learning_rate": 8.57182348192589e-05, "loss": 0.0377, "step": 14220 }, { "epoch": 10.259552992069214, "grad_norm": 0.12239472568035126, "learning_rate": 8.569508591847792e-05, "loss": 0.0402, "step": 14230 }, { "epoch": 10.26676279740447, "grad_norm": 0.16118010878562927, "learning_rate": 8.567192140345367e-05, "loss": 0.0365, "step": 14240 }, { "epoch": 10.273972602739725, "grad_norm": 0.16315993666648865, "learning_rate": 8.564874128431906e-05, "loss": 0.0374, "step": 14250 }, { "epoch": 10.281182408074981, "grad_norm": 0.17636257410049438, "learning_rate": 8.562554557121389e-05, "loss": 0.0436, "step": 14260 }, { "epoch": 10.288392213410237, "grad_norm": 0.13529568910598755, "learning_rate": 8.560233427428475e-05, "loss": 0.0362, "step": 14270 }, { "epoch": 10.295602018745495, "grad_norm": 0.125763401389122, "learning_rate": 8.557910740368503e-05, "loss": 0.0364, "step": 14280 }, { "epoch": 10.30281182408075, "grad_norm": 0.11785879731178284, "learning_rate": 8.555586496957495e-05, "loss": 0.0354, "step": 14290 }, { "epoch": 10.310021629416006, "grad_norm": 0.1974257528781891, "learning_rate": 8.553260698212155e-05, "loss": 0.0346, "step": 14300 }, { "epoch": 10.317231434751262, "grad_norm": 0.12194450199604034, "learning_rate": 8.550933345149868e-05, "loss": 0.0312, "step": 14310 }, { "epoch": 10.324441240086518, "grad_norm": 0.13121402263641357, "learning_rate": 8.548604438788696e-05, "loss": 0.0368, "step": 14320 }, { "epoch": 10.331651045421774, "grad_norm": 0.15631070733070374, "learning_rate": 8.546273980147383e-05, "loss": 0.0448, "step": 14330 }, { "epoch": 10.33886085075703, "grad_norm": 0.1219649389386177, "learning_rate": 8.543941970245348e-05, "loss": 0.0371, "step": 14340 }, { "epoch": 10.346070656092285, "grad_norm": 0.10055261105298996, "learning_rate": 8.541608410102693e-05, "loss": 0.0446, "step": 14350 }, { "epoch": 10.353280461427541, "grad_norm": 0.15412025153636932, "learning_rate": 8.539273300740195e-05, "loss": 0.0437, "step": 14360 }, { "epoch": 10.360490266762797, "grad_norm": 0.12562178075313568, "learning_rate": 8.536936643179313e-05, "loss": 0.0418, "step": 14370 }, { "epoch": 10.367700072098053, "grad_norm": 0.1701778620481491, "learning_rate": 8.534598438442179e-05, "loss": 0.0414, "step": 14380 }, { "epoch": 10.374909877433309, "grad_norm": 0.17757295072078705, "learning_rate": 8.532258687551603e-05, "loss": 0.0406, "step": 14390 }, { "epoch": 10.382119682768565, "grad_norm": 0.1653238981962204, "learning_rate": 8.529917391531071e-05, "loss": 0.0374, "step": 14400 }, { "epoch": 10.38932948810382, "grad_norm": 0.14651979506015778, "learning_rate": 8.527574551404747e-05, "loss": 0.0382, "step": 14410 }, { "epoch": 10.396539293439076, "grad_norm": 0.15542174875736237, "learning_rate": 8.525230168197468e-05, "loss": 0.0414, "step": 14420 }, { "epoch": 10.403749098774334, "grad_norm": 0.12626755237579346, "learning_rate": 8.522884242934745e-05, "loss": 0.0384, "step": 14430 }, { "epoch": 10.41095890410959, "grad_norm": 0.16392995417118073, "learning_rate": 8.520536776642768e-05, "loss": 0.0355, "step": 14440 }, { "epoch": 10.418168709444846, "grad_norm": 0.12750908732414246, "learning_rate": 8.5181877703484e-05, "loss": 0.0398, "step": 14450 }, { "epoch": 10.425378514780101, "grad_norm": 0.1355070024728775, "learning_rate": 8.51583722507917e-05, "loss": 0.0415, "step": 14460 }, { "epoch": 10.432588320115357, "grad_norm": 0.16549859941005707, "learning_rate": 8.513485141863293e-05, "loss": 0.0392, "step": 14470 }, { "epoch": 10.439798125450613, "grad_norm": 0.15128467977046967, "learning_rate": 8.511131521729647e-05, "loss": 0.039, "step": 14480 }, { "epoch": 10.447007930785869, "grad_norm": 0.15879730880260468, "learning_rate": 8.508776365707787e-05, "loss": 0.0424, "step": 14490 }, { "epoch": 10.454217736121125, "grad_norm": 0.17622633278369904, "learning_rate": 8.506419674827934e-05, "loss": 0.0387, "step": 14500 }, { "epoch": 10.46142754145638, "grad_norm": 0.1504887044429779, "learning_rate": 8.50406145012099e-05, "loss": 0.0368, "step": 14510 }, { "epoch": 10.468637346791636, "grad_norm": 0.15525031089782715, "learning_rate": 8.501701692618519e-05, "loss": 0.0363, "step": 14520 }, { "epoch": 10.475847152126892, "grad_norm": 0.12874923646450043, "learning_rate": 8.499340403352761e-05, "loss": 0.0357, "step": 14530 }, { "epoch": 10.483056957462148, "grad_norm": 0.15233346819877625, "learning_rate": 8.496977583356623e-05, "loss": 0.0418, "step": 14540 }, { "epoch": 10.490266762797404, "grad_norm": 0.11631929874420166, "learning_rate": 8.494613233663684e-05, "loss": 0.0327, "step": 14550 }, { "epoch": 10.49747656813266, "grad_norm": 0.1269131451845169, "learning_rate": 8.492247355308189e-05, "loss": 0.0438, "step": 14560 }, { "epoch": 10.504686373467916, "grad_norm": 0.14460071921348572, "learning_rate": 8.489879949325056e-05, "loss": 0.04, "step": 14570 }, { "epoch": 10.511896178803172, "grad_norm": 0.1541164666414261, "learning_rate": 8.487511016749868e-05, "loss": 0.0415, "step": 14580 }, { "epoch": 10.519105984138427, "grad_norm": 0.14810122549533844, "learning_rate": 8.485140558618874e-05, "loss": 0.0394, "step": 14590 }, { "epoch": 10.526315789473685, "grad_norm": 0.18111000955104828, "learning_rate": 8.482768575968995e-05, "loss": 0.0399, "step": 14600 }, { "epoch": 10.53352559480894, "grad_norm": 0.1888503134250641, "learning_rate": 8.480395069837818e-05, "loss": 0.0364, "step": 14610 }, { "epoch": 10.540735400144197, "grad_norm": 0.10001499205827713, "learning_rate": 8.478020041263595e-05, "loss": 0.0392, "step": 14620 }, { "epoch": 10.547945205479452, "grad_norm": 0.1521788388490677, "learning_rate": 8.475643491285242e-05, "loss": 0.0389, "step": 14630 }, { "epoch": 10.555155010814708, "grad_norm": 0.1482938528060913, "learning_rate": 8.473265420942345e-05, "loss": 0.0422, "step": 14640 }, { "epoch": 10.562364816149964, "grad_norm": 0.1602165848016739, "learning_rate": 8.470885831275151e-05, "loss": 0.0375, "step": 14650 }, { "epoch": 10.56957462148522, "grad_norm": 0.14845585823059082, "learning_rate": 8.468504723324574e-05, "loss": 0.0328, "step": 14660 }, { "epoch": 10.576784426820476, "grad_norm": 0.12157813459634781, "learning_rate": 8.466122098132193e-05, "loss": 0.0344, "step": 14670 }, { "epoch": 10.583994232155732, "grad_norm": 0.15701718628406525, "learning_rate": 8.463737956740245e-05, "loss": 0.0358, "step": 14680 }, { "epoch": 10.591204037490987, "grad_norm": 0.09241660684347153, "learning_rate": 8.461352300191639e-05, "loss": 0.0327, "step": 14690 }, { "epoch": 10.598413842826243, "grad_norm": 0.1921989470720291, "learning_rate": 8.45896512952994e-05, "loss": 0.0447, "step": 14700 }, { "epoch": 10.6056236481615, "grad_norm": 0.17056973278522491, "learning_rate": 8.456576445799377e-05, "loss": 0.0436, "step": 14710 }, { "epoch": 10.612833453496755, "grad_norm": 0.16990232467651367, "learning_rate": 8.454186250044844e-05, "loss": 0.0359, "step": 14720 }, { "epoch": 10.62004325883201, "grad_norm": 0.12821064889431, "learning_rate": 8.451794543311892e-05, "loss": 0.0356, "step": 14730 }, { "epoch": 10.627253064167267, "grad_norm": 0.14737452566623688, "learning_rate": 8.449401326646736e-05, "loss": 0.0417, "step": 14740 }, { "epoch": 10.634462869502524, "grad_norm": 0.11265460401773453, "learning_rate": 8.447006601096248e-05, "loss": 0.042, "step": 14750 }, { "epoch": 10.64167267483778, "grad_norm": 0.13777145743370056, "learning_rate": 8.444610367707964e-05, "loss": 0.035, "step": 14760 }, { "epoch": 10.648882480173036, "grad_norm": 0.1624877005815506, "learning_rate": 8.442212627530078e-05, "loss": 0.0403, "step": 14770 }, { "epoch": 10.656092285508292, "grad_norm": 0.17795111238956451, "learning_rate": 8.439813381611441e-05, "loss": 0.0413, "step": 14780 }, { "epoch": 10.663302090843548, "grad_norm": 0.12841476500034332, "learning_rate": 8.437412631001567e-05, "loss": 0.0374, "step": 14790 }, { "epoch": 10.670511896178803, "grad_norm": 0.10317642241716385, "learning_rate": 8.435010376750626e-05, "loss": 0.0412, "step": 14800 }, { "epoch": 10.67772170151406, "grad_norm": 0.13674573600292206, "learning_rate": 8.432606619909442e-05, "loss": 0.0302, "step": 14810 }, { "epoch": 10.684931506849315, "grad_norm": 0.13665446639060974, "learning_rate": 8.430201361529506e-05, "loss": 0.0377, "step": 14820 }, { "epoch": 10.692141312184571, "grad_norm": 0.17271071672439575, "learning_rate": 8.427794602662954e-05, "loss": 0.0368, "step": 14830 }, { "epoch": 10.699351117519827, "grad_norm": 0.13833950459957123, "learning_rate": 8.425386344362586e-05, "loss": 0.039, "step": 14840 }, { "epoch": 10.706560922855083, "grad_norm": 0.09703443199396133, "learning_rate": 8.422976587681859e-05, "loss": 0.0345, "step": 14850 }, { "epoch": 10.713770728190338, "grad_norm": 0.16897016763687134, "learning_rate": 8.42056533367488e-05, "loss": 0.0395, "step": 14860 }, { "epoch": 10.720980533525594, "grad_norm": 0.17638637125492096, "learning_rate": 8.41815258339641e-05, "loss": 0.0398, "step": 14870 }, { "epoch": 10.72819033886085, "grad_norm": 0.11737645417451859, "learning_rate": 8.415738337901874e-05, "loss": 0.0344, "step": 14880 }, { "epoch": 10.735400144196106, "grad_norm": 0.1351664662361145, "learning_rate": 8.413322598247342e-05, "loss": 0.038, "step": 14890 }, { "epoch": 10.742609949531362, "grad_norm": 0.10940893739461899, "learning_rate": 8.41090536548954e-05, "loss": 0.0397, "step": 14900 }, { "epoch": 10.749819754866618, "grad_norm": 0.15871542692184448, "learning_rate": 8.408486640685849e-05, "loss": 0.0341, "step": 14910 }, { "epoch": 10.757029560201875, "grad_norm": 0.14266400039196014, "learning_rate": 8.4060664248943e-05, "loss": 0.0384, "step": 14920 }, { "epoch": 10.764239365537131, "grad_norm": 0.14138789474964142, "learning_rate": 8.40364471917358e-05, "loss": 0.0446, "step": 14930 }, { "epoch": 10.771449170872387, "grad_norm": 0.16437433660030365, "learning_rate": 8.401221524583024e-05, "loss": 0.0365, "step": 14940 }, { "epoch": 10.778658976207643, "grad_norm": 0.09847448766231537, "learning_rate": 8.398796842182619e-05, "loss": 0.035, "step": 14950 }, { "epoch": 10.785868781542899, "grad_norm": 0.14081817865371704, "learning_rate": 8.396370673033006e-05, "loss": 0.0342, "step": 14960 }, { "epoch": 10.793078586878154, "grad_norm": 0.14440909028053284, "learning_rate": 8.39394301819547e-05, "loss": 0.0325, "step": 14970 }, { "epoch": 10.80028839221341, "grad_norm": 0.1456538736820221, "learning_rate": 8.391513878731949e-05, "loss": 0.0352, "step": 14980 }, { "epoch": 10.807498197548666, "grad_norm": 0.14026585221290588, "learning_rate": 8.389083255705037e-05, "loss": 0.0383, "step": 14990 }, { "epoch": 10.814708002883922, "grad_norm": 0.1441844403743744, "learning_rate": 8.386651150177968e-05, "loss": 0.0397, "step": 15000 }, { "epoch": 10.821917808219178, "grad_norm": 0.13172776997089386, "learning_rate": 8.384217563214627e-05, "loss": 0.037, "step": 15010 }, { "epoch": 10.829127613554434, "grad_norm": 0.11858581006526947, "learning_rate": 8.381782495879546e-05, "loss": 0.0348, "step": 15020 }, { "epoch": 10.83633741888969, "grad_norm": 0.13452984392642975, "learning_rate": 8.37934594923791e-05, "loss": 0.035, "step": 15030 }, { "epoch": 10.843547224224945, "grad_norm": 0.13610202074050903, "learning_rate": 8.376907924355546e-05, "loss": 0.0429, "step": 15040 }, { "epoch": 10.850757029560201, "grad_norm": 0.14667263627052307, "learning_rate": 8.374468422298926e-05, "loss": 0.0389, "step": 15050 }, { "epoch": 10.857966834895457, "grad_norm": 0.12266140431165695, "learning_rate": 8.372027444135176e-05, "loss": 0.0337, "step": 15060 }, { "epoch": 10.865176640230715, "grad_norm": 0.12592406570911407, "learning_rate": 8.36958499093206e-05, "loss": 0.0371, "step": 15070 }, { "epoch": 10.87238644556597, "grad_norm": 0.14648805558681488, "learning_rate": 8.367141063757988e-05, "loss": 0.0388, "step": 15080 }, { "epoch": 10.879596250901226, "grad_norm": 0.1674196869134903, "learning_rate": 8.364695663682022e-05, "loss": 0.0379, "step": 15090 }, { "epoch": 10.886806056236482, "grad_norm": 0.13862858712673187, "learning_rate": 8.362248791773858e-05, "loss": 0.0319, "step": 15100 }, { "epoch": 10.894015861571738, "grad_norm": 0.11675728112459183, "learning_rate": 8.359800449103842e-05, "loss": 0.0397, "step": 15110 }, { "epoch": 10.901225666906994, "grad_norm": 0.13923151791095734, "learning_rate": 8.357350636742967e-05, "loss": 0.0441, "step": 15120 }, { "epoch": 10.90843547224225, "grad_norm": 0.16460084915161133, "learning_rate": 8.354899355762858e-05, "loss": 0.0387, "step": 15130 }, { "epoch": 10.915645277577505, "grad_norm": 0.1245555728673935, "learning_rate": 8.352446607235791e-05, "loss": 0.037, "step": 15140 }, { "epoch": 10.922855082912761, "grad_norm": 0.1374129354953766, "learning_rate": 8.349992392234683e-05, "loss": 0.0409, "step": 15150 }, { "epoch": 10.930064888248017, "grad_norm": 0.14065781235694885, "learning_rate": 8.347536711833088e-05, "loss": 0.0322, "step": 15160 }, { "epoch": 10.937274693583273, "grad_norm": 0.10348035395145416, "learning_rate": 8.345079567105206e-05, "loss": 0.0334, "step": 15170 }, { "epoch": 10.944484498918529, "grad_norm": 0.1215464174747467, "learning_rate": 8.342620959125875e-05, "loss": 0.0361, "step": 15180 }, { "epoch": 10.951694304253785, "grad_norm": 0.11834301799535751, "learning_rate": 8.340160888970574e-05, "loss": 0.0322, "step": 15190 }, { "epoch": 10.95890410958904, "grad_norm": 0.15854349732398987, "learning_rate": 8.337699357715422e-05, "loss": 0.0387, "step": 15200 }, { "epoch": 10.966113914924296, "grad_norm": 0.16800342500209808, "learning_rate": 8.335236366437174e-05, "loss": 0.0398, "step": 15210 }, { "epoch": 10.973323720259552, "grad_norm": 0.08649777621030807, "learning_rate": 8.332771916213227e-05, "loss": 0.0369, "step": 15220 }, { "epoch": 10.980533525594808, "grad_norm": 0.14426332712173462, "learning_rate": 8.330306008121618e-05, "loss": 0.0327, "step": 15230 }, { "epoch": 10.987743330930066, "grad_norm": 0.16838978230953217, "learning_rate": 8.327838643241014e-05, "loss": 0.0363, "step": 15240 }, { "epoch": 10.994953136265321, "grad_norm": 0.14584192633628845, "learning_rate": 8.32536982265073e-05, "loss": 0.0439, "step": 15250 }, { "epoch": 11.002162941600577, "grad_norm": 0.11142737418413162, "learning_rate": 8.32289954743071e-05, "loss": 0.0346, "step": 15260 }, { "epoch": 11.009372746935833, "grad_norm": 0.1204177662730217, "learning_rate": 8.320427818661532e-05, "loss": 0.04, "step": 15270 }, { "epoch": 11.016582552271089, "grad_norm": 0.14059604704380035, "learning_rate": 8.317954637424419e-05, "loss": 0.0421, "step": 15280 }, { "epoch": 11.023792357606345, "grad_norm": 0.133451908826828, "learning_rate": 8.315480004801221e-05, "loss": 0.0351, "step": 15290 }, { "epoch": 11.0310021629416, "grad_norm": 0.12935011088848114, "learning_rate": 8.31300392187443e-05, "loss": 0.0397, "step": 15300 }, { "epoch": 11.038211968276856, "grad_norm": 0.1392558515071869, "learning_rate": 8.31052638972717e-05, "loss": 0.0355, "step": 15310 }, { "epoch": 11.045421773612112, "grad_norm": 0.13789504766464233, "learning_rate": 8.30804740944319e-05, "loss": 0.0422, "step": 15320 }, { "epoch": 11.052631578947368, "grad_norm": 0.15124662220478058, "learning_rate": 8.305566982106887e-05, "loss": 0.0452, "step": 15330 }, { "epoch": 11.059841384282624, "grad_norm": 0.15702137351036072, "learning_rate": 8.303085108803282e-05, "loss": 0.0373, "step": 15340 }, { "epoch": 11.06705118961788, "grad_norm": 0.14684437215328217, "learning_rate": 8.300601790618032e-05, "loss": 0.0364, "step": 15350 }, { "epoch": 11.074260994953136, "grad_norm": 0.14086760580539703, "learning_rate": 8.298117028637423e-05, "loss": 0.0379, "step": 15360 }, { "epoch": 11.081470800288391, "grad_norm": 0.12256602942943573, "learning_rate": 8.295630823948375e-05, "loss": 0.0337, "step": 15370 }, { "epoch": 11.088680605623647, "grad_norm": 0.1847156435251236, "learning_rate": 8.293143177638438e-05, "loss": 0.0336, "step": 15380 }, { "epoch": 11.095890410958905, "grad_norm": 0.15821923315525055, "learning_rate": 8.290654090795795e-05, "loss": 0.0324, "step": 15390 }, { "epoch": 11.10310021629416, "grad_norm": 0.14824330806732178, "learning_rate": 8.288163564509253e-05, "loss": 0.0424, "step": 15400 }, { "epoch": 11.110310021629417, "grad_norm": 0.10920871049165726, "learning_rate": 8.285671599868257e-05, "loss": 0.0435, "step": 15410 }, { "epoch": 11.117519826964672, "grad_norm": 0.16078466176986694, "learning_rate": 8.283178197962875e-05, "loss": 0.0383, "step": 15420 }, { "epoch": 11.124729632299928, "grad_norm": 0.14540259540081024, "learning_rate": 8.280683359883808e-05, "loss": 0.0362, "step": 15430 }, { "epoch": 11.131939437635184, "grad_norm": 0.12862740457057953, "learning_rate": 8.278187086722377e-05, "loss": 0.0331, "step": 15440 }, { "epoch": 11.13914924297044, "grad_norm": 0.17877739667892456, "learning_rate": 8.275689379570544e-05, "loss": 0.0406, "step": 15450 }, { "epoch": 11.146359048305696, "grad_norm": 0.17907947301864624, "learning_rate": 8.273190239520887e-05, "loss": 0.0394, "step": 15460 }, { "epoch": 11.153568853640952, "grad_norm": 0.18210311233997345, "learning_rate": 8.270689667666616e-05, "loss": 0.0315, "step": 15470 }, { "epoch": 11.160778658976207, "grad_norm": 0.17031508684158325, "learning_rate": 8.268187665101563e-05, "loss": 0.0379, "step": 15480 }, { "epoch": 11.167988464311463, "grad_norm": 0.1406349539756775, "learning_rate": 8.265684232920194e-05, "loss": 0.0342, "step": 15490 }, { "epoch": 11.17519826964672, "grad_norm": 0.11884036660194397, "learning_rate": 8.263179372217592e-05, "loss": 0.0394, "step": 15500 }, { "epoch": 11.182408074981975, "grad_norm": 0.10274189710617065, "learning_rate": 8.260673084089468e-05, "loss": 0.0345, "step": 15510 }, { "epoch": 11.18961788031723, "grad_norm": 0.12161222845315933, "learning_rate": 8.25816536963216e-05, "loss": 0.0346, "step": 15520 }, { "epoch": 11.196827685652487, "grad_norm": 0.11618877947330475, "learning_rate": 8.255656229942626e-05, "loss": 0.039, "step": 15530 }, { "epoch": 11.204037490987742, "grad_norm": 0.1665947437286377, "learning_rate": 8.253145666118447e-05, "loss": 0.0422, "step": 15540 }, { "epoch": 11.211247296323, "grad_norm": 0.14121820032596588, "learning_rate": 8.250633679257834e-05, "loss": 0.039, "step": 15550 }, { "epoch": 11.218457101658256, "grad_norm": 0.14407341182231903, "learning_rate": 8.24812027045961e-05, "loss": 0.0375, "step": 15560 }, { "epoch": 11.225666906993512, "grad_norm": 0.1733572632074356, "learning_rate": 8.245605440823226e-05, "loss": 0.0324, "step": 15570 }, { "epoch": 11.232876712328768, "grad_norm": 0.1631636917591095, "learning_rate": 8.243089191448759e-05, "loss": 0.0334, "step": 15580 }, { "epoch": 11.240086517664023, "grad_norm": 0.15487822890281677, "learning_rate": 8.240571523436896e-05, "loss": 0.0374, "step": 15590 }, { "epoch": 11.24729632299928, "grad_norm": 0.12587745487689972, "learning_rate": 8.238052437888955e-05, "loss": 0.0372, "step": 15600 }, { "epoch": 11.254506128334535, "grad_norm": 0.12978419661521912, "learning_rate": 8.235531935906866e-05, "loss": 0.0343, "step": 15610 }, { "epoch": 11.261715933669791, "grad_norm": 0.13188157975673676, "learning_rate": 8.233010018593185e-05, "loss": 0.0353, "step": 15620 }, { "epoch": 11.268925739005047, "grad_norm": 0.07674876600503922, "learning_rate": 8.230486687051082e-05, "loss": 0.0325, "step": 15630 }, { "epoch": 11.276135544340303, "grad_norm": 0.14698129892349243, "learning_rate": 8.22796194238435e-05, "loss": 0.0321, "step": 15640 }, { "epoch": 11.283345349675558, "grad_norm": 0.13098052144050598, "learning_rate": 8.225435785697397e-05, "loss": 0.0365, "step": 15650 }, { "epoch": 11.290555155010814, "grad_norm": 0.1324511170387268, "learning_rate": 8.222908218095252e-05, "loss": 0.0366, "step": 15660 }, { "epoch": 11.29776496034607, "grad_norm": 0.1493748277425766, "learning_rate": 8.220379240683553e-05, "loss": 0.0373, "step": 15670 }, { "epoch": 11.304974765681326, "grad_norm": 0.1250842958688736, "learning_rate": 8.217848854568566e-05, "loss": 0.0368, "step": 15680 }, { "epoch": 11.312184571016582, "grad_norm": 0.16619424521923065, "learning_rate": 8.215317060857167e-05, "loss": 0.0347, "step": 15690 }, { "epoch": 11.319394376351838, "grad_norm": 0.11821813881397247, "learning_rate": 8.212783860656847e-05, "loss": 0.0363, "step": 15700 }, { "epoch": 11.326604181687095, "grad_norm": 0.15882349014282227, "learning_rate": 8.210249255075717e-05, "loss": 0.0394, "step": 15710 }, { "epoch": 11.333813987022351, "grad_norm": 0.13352179527282715, "learning_rate": 8.207713245222496e-05, "loss": 0.035, "step": 15720 }, { "epoch": 11.341023792357607, "grad_norm": 0.1293746531009674, "learning_rate": 8.205175832206522e-05, "loss": 0.0335, "step": 15730 }, { "epoch": 11.348233597692863, "grad_norm": 0.17428146302700043, "learning_rate": 8.202637017137747e-05, "loss": 0.0367, "step": 15740 }, { "epoch": 11.355443403028119, "grad_norm": 0.1854015588760376, "learning_rate": 8.200096801126734e-05, "loss": 0.0383, "step": 15750 }, { "epoch": 11.362653208363374, "grad_norm": 0.1455773264169693, "learning_rate": 8.197555185284658e-05, "loss": 0.0376, "step": 15760 }, { "epoch": 11.36986301369863, "grad_norm": 0.14676539599895477, "learning_rate": 8.19501217072331e-05, "loss": 0.0376, "step": 15770 }, { "epoch": 11.377072819033886, "grad_norm": 0.19242095947265625, "learning_rate": 8.192467758555091e-05, "loss": 0.0411, "step": 15780 }, { "epoch": 11.384282624369142, "grad_norm": 0.1638118177652359, "learning_rate": 8.189921949893014e-05, "loss": 0.0392, "step": 15790 }, { "epoch": 11.391492429704398, "grad_norm": 0.1944098323583603, "learning_rate": 8.187374745850702e-05, "loss": 0.0441, "step": 15800 }, { "epoch": 11.398702235039654, "grad_norm": 0.13336127996444702, "learning_rate": 8.184826147542387e-05, "loss": 0.0374, "step": 15810 }, { "epoch": 11.40591204037491, "grad_norm": 0.16366200149059296, "learning_rate": 8.18227615608291e-05, "loss": 0.0353, "step": 15820 }, { "epoch": 11.413121845710165, "grad_norm": 0.18148519098758698, "learning_rate": 8.179724772587732e-05, "loss": 0.0392, "step": 15830 }, { "epoch": 11.420331651045421, "grad_norm": 0.14567525684833527, "learning_rate": 8.177171998172908e-05, "loss": 0.0349, "step": 15840 }, { "epoch": 11.427541456380677, "grad_norm": 0.12077885121107101, "learning_rate": 8.174617833955112e-05, "loss": 0.0402, "step": 15850 }, { "epoch": 11.434751261715933, "grad_norm": 0.13935549557209015, "learning_rate": 8.172062281051618e-05, "loss": 0.0347, "step": 15860 }, { "epoch": 11.44196106705119, "grad_norm": 0.14668941497802734, "learning_rate": 8.169505340580316e-05, "loss": 0.0328, "step": 15870 }, { "epoch": 11.449170872386446, "grad_norm": 0.22911950945854187, "learning_rate": 8.166947013659696e-05, "loss": 0.0396, "step": 15880 }, { "epoch": 11.456380677721702, "grad_norm": 0.1225552037358284, "learning_rate": 8.164387301408858e-05, "loss": 0.0362, "step": 15890 }, { "epoch": 11.463590483056958, "grad_norm": 0.14916332066059113, "learning_rate": 8.161826204947507e-05, "loss": 0.0363, "step": 15900 }, { "epoch": 11.470800288392214, "grad_norm": 0.2107640504837036, "learning_rate": 8.159263725395954e-05, "loss": 0.0359, "step": 15910 }, { "epoch": 11.47801009372747, "grad_norm": 0.12816165387630463, "learning_rate": 8.156699863875115e-05, "loss": 0.0343, "step": 15920 }, { "epoch": 11.485219899062725, "grad_norm": 0.2118701934814453, "learning_rate": 8.154134621506508e-05, "loss": 0.0386, "step": 15930 }, { "epoch": 11.492429704397981, "grad_norm": 0.17323605716228485, "learning_rate": 8.15156799941226e-05, "loss": 0.0425, "step": 15940 }, { "epoch": 11.499639509733237, "grad_norm": 0.12797468900680542, "learning_rate": 8.148999998715096e-05, "loss": 0.035, "step": 15950 }, { "epoch": 11.506849315068493, "grad_norm": 0.12494250386953354, "learning_rate": 8.146430620538348e-05, "loss": 0.0407, "step": 15960 }, { "epoch": 11.514059120403749, "grad_norm": 0.17467348277568817, "learning_rate": 8.143859866005949e-05, "loss": 0.0417, "step": 15970 }, { "epoch": 11.521268925739005, "grad_norm": 0.15675053000450134, "learning_rate": 8.141287736242438e-05, "loss": 0.0434, "step": 15980 }, { "epoch": 11.52847873107426, "grad_norm": 0.15131120383739471, "learning_rate": 8.138714232372945e-05, "loss": 0.0347, "step": 15990 }, { "epoch": 11.535688536409516, "grad_norm": 0.13043326139450073, "learning_rate": 8.136139355523213e-05, "loss": 0.0354, "step": 16000 }, { "epoch": 11.542898341744772, "grad_norm": 0.11443059891462326, "learning_rate": 8.13356310681958e-05, "loss": 0.0377, "step": 16010 }, { "epoch": 11.550108147080028, "grad_norm": 0.14949573576450348, "learning_rate": 8.130985487388982e-05, "loss": 0.0314, "step": 16020 }, { "epoch": 11.557317952415286, "grad_norm": 0.14263226091861725, "learning_rate": 8.12840649835896e-05, "loss": 0.0337, "step": 16030 }, { "epoch": 11.564527757750541, "grad_norm": 0.13902650773525238, "learning_rate": 8.125826140857653e-05, "loss": 0.0384, "step": 16040 }, { "epoch": 11.571737563085797, "grad_norm": 0.1673775017261505, "learning_rate": 8.123244416013792e-05, "loss": 0.0393, "step": 16050 }, { "epoch": 11.578947368421053, "grad_norm": 0.14035378396511078, "learning_rate": 8.120661324956716e-05, "loss": 0.0392, "step": 16060 }, { "epoch": 11.586157173756309, "grad_norm": 0.14218568801879883, "learning_rate": 8.118076868816353e-05, "loss": 0.034, "step": 16070 }, { "epoch": 11.593366979091565, "grad_norm": 0.13723637163639069, "learning_rate": 8.115491048723234e-05, "loss": 0.0399, "step": 16080 }, { "epoch": 11.60057678442682, "grad_norm": 0.1349852830171585, "learning_rate": 8.112903865808486e-05, "loss": 0.037, "step": 16090 }, { "epoch": 11.607786589762076, "grad_norm": 0.1504932940006256, "learning_rate": 8.110315321203826e-05, "loss": 0.0354, "step": 16100 }, { "epoch": 11.614996395097332, "grad_norm": 0.11844120919704437, "learning_rate": 8.107725416041575e-05, "loss": 0.0321, "step": 16110 }, { "epoch": 11.622206200432588, "grad_norm": 0.15236423909664154, "learning_rate": 8.105134151454645e-05, "loss": 0.0393, "step": 16120 }, { "epoch": 11.629416005767844, "grad_norm": 0.13082243502140045, "learning_rate": 8.102541528576544e-05, "loss": 0.0391, "step": 16130 }, { "epoch": 11.6366258111031, "grad_norm": 0.15288962423801422, "learning_rate": 8.099947548541371e-05, "loss": 0.0334, "step": 16140 }, { "epoch": 11.643835616438356, "grad_norm": 0.19772183895111084, "learning_rate": 8.097352212483824e-05, "loss": 0.0447, "step": 16150 }, { "epoch": 11.651045421773611, "grad_norm": 0.1619817465543747, "learning_rate": 8.094755521539187e-05, "loss": 0.035, "step": 16160 }, { "epoch": 11.658255227108867, "grad_norm": 0.13319198787212372, "learning_rate": 8.092157476843346e-05, "loss": 0.036, "step": 16170 }, { "epoch": 11.665465032444125, "grad_norm": 0.14009223878383636, "learning_rate": 8.089558079532769e-05, "loss": 0.0394, "step": 16180 }, { "epoch": 11.67267483777938, "grad_norm": 0.15581132471561432, "learning_rate": 8.086957330744522e-05, "loss": 0.037, "step": 16190 }, { "epoch": 11.679884643114637, "grad_norm": 0.16084317862987518, "learning_rate": 8.084355231616265e-05, "loss": 0.0376, "step": 16200 }, { "epoch": 11.687094448449892, "grad_norm": 0.1313025951385498, "learning_rate": 8.08175178328624e-05, "loss": 0.0433, "step": 16210 }, { "epoch": 11.694304253785148, "grad_norm": 0.11679201573133469, "learning_rate": 8.079146986893284e-05, "loss": 0.0331, "step": 16220 }, { "epoch": 11.701514059120404, "grad_norm": 0.20069588720798492, "learning_rate": 8.076540843576826e-05, "loss": 0.0354, "step": 16230 }, { "epoch": 11.70872386445566, "grad_norm": 0.11789727956056595, "learning_rate": 8.073933354476879e-05, "loss": 0.0377, "step": 16240 }, { "epoch": 11.715933669790916, "grad_norm": 0.12795759737491608, "learning_rate": 8.07132452073405e-05, "loss": 0.0369, "step": 16250 }, { "epoch": 11.723143475126172, "grad_norm": 0.16105638444423676, "learning_rate": 8.068714343489529e-05, "loss": 0.0327, "step": 16260 }, { "epoch": 11.730353280461427, "grad_norm": 0.16412490606307983, "learning_rate": 8.066102823885097e-05, "loss": 0.0391, "step": 16270 }, { "epoch": 11.737563085796683, "grad_norm": 0.14397285878658295, "learning_rate": 8.063489963063125e-05, "loss": 0.0364, "step": 16280 }, { "epoch": 11.74477289113194, "grad_norm": 0.13800278306007385, "learning_rate": 8.06087576216656e-05, "loss": 0.0404, "step": 16290 }, { "epoch": 11.751982696467195, "grad_norm": 0.14140702784061432, "learning_rate": 8.058260222338946e-05, "loss": 0.0374, "step": 16300 }, { "epoch": 11.75919250180245, "grad_norm": 0.15658576786518097, "learning_rate": 8.05564334472441e-05, "loss": 0.0349, "step": 16310 }, { "epoch": 11.766402307137707, "grad_norm": 0.11492084711790085, "learning_rate": 8.053025130467664e-05, "loss": 0.0365, "step": 16320 }, { "epoch": 11.773612112472962, "grad_norm": 0.12817536294460297, "learning_rate": 8.050405580714e-05, "loss": 0.0343, "step": 16330 }, { "epoch": 11.780821917808218, "grad_norm": 0.1583796739578247, "learning_rate": 8.047784696609301e-05, "loss": 0.0409, "step": 16340 }, { "epoch": 11.788031723143476, "grad_norm": 0.151544451713562, "learning_rate": 8.045162479300029e-05, "loss": 0.0296, "step": 16350 }, { "epoch": 11.795241528478732, "grad_norm": 0.15437810122966766, "learning_rate": 8.042538929933232e-05, "loss": 0.038, "step": 16360 }, { "epoch": 11.802451333813988, "grad_norm": 0.11889104545116425, "learning_rate": 8.039914049656539e-05, "loss": 0.0398, "step": 16370 }, { "epoch": 11.809661139149243, "grad_norm": 0.1601543128490448, "learning_rate": 8.037287839618163e-05, "loss": 0.0354, "step": 16380 }, { "epoch": 11.8168709444845, "grad_norm": 0.1370299607515335, "learning_rate": 8.034660300966898e-05, "loss": 0.0429, "step": 16390 }, { "epoch": 11.824080749819755, "grad_norm": 0.1599522829055786, "learning_rate": 8.032031434852115e-05, "loss": 0.0349, "step": 16400 }, { "epoch": 11.831290555155011, "grad_norm": 0.1428803950548172, "learning_rate": 8.029401242423773e-05, "loss": 0.0322, "step": 16410 }, { "epoch": 11.838500360490267, "grad_norm": 0.14353398978710175, "learning_rate": 8.026769724832406e-05, "loss": 0.0365, "step": 16420 }, { "epoch": 11.845710165825523, "grad_norm": 0.12324049323797226, "learning_rate": 8.02413688322913e-05, "loss": 0.0339, "step": 16430 }, { "epoch": 11.852919971160778, "grad_norm": 0.14091862738132477, "learning_rate": 8.021502718765639e-05, "loss": 0.0352, "step": 16440 }, { "epoch": 11.860129776496034, "grad_norm": 0.13892973959445953, "learning_rate": 8.018867232594204e-05, "loss": 0.0344, "step": 16450 }, { "epoch": 11.86733958183129, "grad_norm": 0.1263512820005417, "learning_rate": 8.016230425867679e-05, "loss": 0.0329, "step": 16460 }, { "epoch": 11.874549387166546, "grad_norm": 0.14042533934116364, "learning_rate": 8.013592299739494e-05, "loss": 0.0367, "step": 16470 }, { "epoch": 11.881759192501802, "grad_norm": 0.1632772982120514, "learning_rate": 8.01095285536365e-05, "loss": 0.0376, "step": 16480 }, { "epoch": 11.888968997837058, "grad_norm": 0.1676589846611023, "learning_rate": 8.008312093894735e-05, "loss": 0.0396, "step": 16490 }, { "epoch": 11.896178803172315, "grad_norm": 0.1318831592798233, "learning_rate": 8.005670016487903e-05, "loss": 0.038, "step": 16500 }, { "epoch": 11.903388608507571, "grad_norm": 0.09972577542066574, "learning_rate": 8.003026624298892e-05, "loss": 0.0301, "step": 16510 }, { "epoch": 11.910598413842827, "grad_norm": 0.14044390618801117, "learning_rate": 8.00038191848401e-05, "loss": 0.0353, "step": 16520 }, { "epoch": 11.917808219178083, "grad_norm": 0.13069504499435425, "learning_rate": 7.997735900200142e-05, "loss": 0.0313, "step": 16530 }, { "epoch": 11.925018024513339, "grad_norm": 0.10667802393436432, "learning_rate": 7.995088570604745e-05, "loss": 0.0301, "step": 16540 }, { "epoch": 11.932227829848594, "grad_norm": 0.16563335061073303, "learning_rate": 7.992439930855852e-05, "loss": 0.0405, "step": 16550 }, { "epoch": 11.93943763518385, "grad_norm": 0.10516338050365448, "learning_rate": 7.989789982112067e-05, "loss": 0.0353, "step": 16560 }, { "epoch": 11.946647440519106, "grad_norm": 0.1873895525932312, "learning_rate": 7.987138725532572e-05, "loss": 0.0384, "step": 16570 }, { "epoch": 11.953857245854362, "grad_norm": 0.13807986676692963, "learning_rate": 7.98448616227711e-05, "loss": 0.0348, "step": 16580 }, { "epoch": 11.961067051189618, "grad_norm": 0.17627866566181183, "learning_rate": 7.981832293506004e-05, "loss": 0.0423, "step": 16590 }, { "epoch": 11.968276856524874, "grad_norm": 0.15679949522018433, "learning_rate": 7.97917712038015e-05, "loss": 0.0308, "step": 16600 }, { "epoch": 11.97548666186013, "grad_norm": 0.1283252239227295, "learning_rate": 7.976520644061007e-05, "loss": 0.0389, "step": 16610 }, { "epoch": 11.982696467195385, "grad_norm": 0.13392725586891174, "learning_rate": 7.973862865710611e-05, "loss": 0.0343, "step": 16620 }, { "epoch": 11.989906272530641, "grad_norm": 0.12121523916721344, "learning_rate": 7.971203786491563e-05, "loss": 0.039, "step": 16630 }, { "epoch": 11.997116077865897, "grad_norm": 0.14150603115558624, "learning_rate": 7.968543407567034e-05, "loss": 0.0346, "step": 16640 }, { "epoch": 12.004325883201153, "grad_norm": 0.13877704739570618, "learning_rate": 7.965881730100765e-05, "loss": 0.0341, "step": 16650 }, { "epoch": 12.011535688536409, "grad_norm": 0.1404356211423874, "learning_rate": 7.963218755257064e-05, "loss": 0.0355, "step": 16660 }, { "epoch": 12.018745493871666, "grad_norm": 0.15688437223434448, "learning_rate": 7.960554484200807e-05, "loss": 0.0367, "step": 16670 }, { "epoch": 12.025955299206922, "grad_norm": 0.18452294170856476, "learning_rate": 7.957888918097436e-05, "loss": 0.0385, "step": 16680 }, { "epoch": 12.033165104542178, "grad_norm": 0.14694596827030182, "learning_rate": 7.955222058112962e-05, "loss": 0.035, "step": 16690 }, { "epoch": 12.040374909877434, "grad_norm": 0.14220112562179565, "learning_rate": 7.952553905413955e-05, "loss": 0.0349, "step": 16700 }, { "epoch": 12.04758471521269, "grad_norm": 0.14828713238239288, "learning_rate": 7.949884461167563e-05, "loss": 0.0389, "step": 16710 }, { "epoch": 12.054794520547945, "grad_norm": 0.0985971987247467, "learning_rate": 7.947213726541484e-05, "loss": 0.0293, "step": 16720 }, { "epoch": 12.062004325883201, "grad_norm": 0.15746624767780304, "learning_rate": 7.944541702703996e-05, "loss": 0.0419, "step": 16730 }, { "epoch": 12.069214131218457, "grad_norm": 0.12817886471748352, "learning_rate": 7.941868390823928e-05, "loss": 0.033, "step": 16740 }, { "epoch": 12.076423936553713, "grad_norm": 0.1737419217824936, "learning_rate": 7.939193792070677e-05, "loss": 0.0325, "step": 16750 }, { "epoch": 12.083633741888969, "grad_norm": 0.1706368625164032, "learning_rate": 7.936517907614207e-05, "loss": 0.0367, "step": 16760 }, { "epoch": 12.090843547224225, "grad_norm": 0.17434319853782654, "learning_rate": 7.933840738625036e-05, "loss": 0.04, "step": 16770 }, { "epoch": 12.09805335255948, "grad_norm": 0.15958380699157715, "learning_rate": 7.931162286274252e-05, "loss": 0.0345, "step": 16780 }, { "epoch": 12.105263157894736, "grad_norm": 0.1276158094406128, "learning_rate": 7.928482551733503e-05, "loss": 0.0332, "step": 16790 }, { "epoch": 12.112472963229992, "grad_norm": 0.16435274481773376, "learning_rate": 7.925801536174991e-05, "loss": 0.036, "step": 16800 }, { "epoch": 12.119682768565248, "grad_norm": 0.11198896169662476, "learning_rate": 7.923119240771484e-05, "loss": 0.035, "step": 16810 }, { "epoch": 12.126892573900506, "grad_norm": 0.17399032413959503, "learning_rate": 7.920435666696313e-05, "loss": 0.0428, "step": 16820 }, { "epoch": 12.134102379235761, "grad_norm": 0.10364747047424316, "learning_rate": 7.917750815123362e-05, "loss": 0.0321, "step": 16830 }, { "epoch": 12.141312184571017, "grad_norm": 0.12825661897659302, "learning_rate": 7.915064687227075e-05, "loss": 0.0356, "step": 16840 }, { "epoch": 12.148521989906273, "grad_norm": 0.11236399412155151, "learning_rate": 7.912377284182458e-05, "loss": 0.0351, "step": 16850 }, { "epoch": 12.155731795241529, "grad_norm": 0.16631808876991272, "learning_rate": 7.90968860716507e-05, "loss": 0.0376, "step": 16860 }, { "epoch": 12.162941600576785, "grad_norm": 0.1128697469830513, "learning_rate": 7.906998657351033e-05, "loss": 0.0339, "step": 16870 }, { "epoch": 12.17015140591204, "grad_norm": 0.12023729085922241, "learning_rate": 7.90430743591702e-05, "loss": 0.0372, "step": 16880 }, { "epoch": 12.177361211247296, "grad_norm": 0.18078583478927612, "learning_rate": 7.901614944040263e-05, "loss": 0.0418, "step": 16890 }, { "epoch": 12.184571016582552, "grad_norm": 0.14293135702610016, "learning_rate": 7.89892118289855e-05, "loss": 0.0376, "step": 16900 }, { "epoch": 12.191780821917808, "grad_norm": 0.1671091914176941, "learning_rate": 7.896226153670223e-05, "loss": 0.0406, "step": 16910 }, { "epoch": 12.198990627253064, "grad_norm": 0.14832739531993866, "learning_rate": 7.89352985753418e-05, "loss": 0.0353, "step": 16920 }, { "epoch": 12.20620043258832, "grad_norm": 0.14894452691078186, "learning_rate": 7.890832295669871e-05, "loss": 0.0355, "step": 16930 }, { "epoch": 12.213410237923576, "grad_norm": 0.14723920822143555, "learning_rate": 7.888133469257303e-05, "loss": 0.0324, "step": 16940 }, { "epoch": 12.220620043258831, "grad_norm": 0.13655920326709747, "learning_rate": 7.885433379477034e-05, "loss": 0.0368, "step": 16950 }, { "epoch": 12.227829848594087, "grad_norm": 0.1271180361509323, "learning_rate": 7.882732027510174e-05, "loss": 0.0324, "step": 16960 }, { "epoch": 12.235039653929343, "grad_norm": 0.1159188374876976, "learning_rate": 7.880029414538386e-05, "loss": 0.0388, "step": 16970 }, { "epoch": 12.242249459264599, "grad_norm": 0.12177480012178421, "learning_rate": 7.877325541743888e-05, "loss": 0.0369, "step": 16980 }, { "epoch": 12.249459264599857, "grad_norm": 0.14129135012626648, "learning_rate": 7.874620410309443e-05, "loss": 0.031, "step": 16990 }, { "epoch": 12.256669069935112, "grad_norm": 0.0996580645442009, "learning_rate": 7.871914021418368e-05, "loss": 0.0351, "step": 17000 }, { "epoch": 12.263878875270368, "grad_norm": 0.18495607376098633, "learning_rate": 7.869206376254528e-05, "loss": 0.0393, "step": 17010 }, { "epoch": 12.271088680605624, "grad_norm": 0.18275022506713867, "learning_rate": 7.86649747600234e-05, "loss": 0.0412, "step": 17020 }, { "epoch": 12.27829848594088, "grad_norm": 0.1319437325000763, "learning_rate": 7.86378732184677e-05, "loss": 0.0311, "step": 17030 }, { "epoch": 12.285508291276136, "grad_norm": 0.10706453770399094, "learning_rate": 7.86107591497333e-05, "loss": 0.0334, "step": 17040 }, { "epoch": 12.292718096611392, "grad_norm": 0.1715465486049652, "learning_rate": 7.858363256568083e-05, "loss": 0.0351, "step": 17050 }, { "epoch": 12.299927901946647, "grad_norm": 0.21142424643039703, "learning_rate": 7.855649347817637e-05, "loss": 0.0374, "step": 17060 }, { "epoch": 12.307137707281903, "grad_norm": 0.18402256071567535, "learning_rate": 7.85293418990915e-05, "loss": 0.0354, "step": 17070 }, { "epoch": 12.314347512617159, "grad_norm": 0.12405744940042496, "learning_rate": 7.85021778403032e-05, "loss": 0.0436, "step": 17080 }, { "epoch": 12.321557317952415, "grad_norm": 0.10648185014724731, "learning_rate": 7.847500131369397e-05, "loss": 0.042, "step": 17090 }, { "epoch": 12.32876712328767, "grad_norm": 0.18330736458301544, "learning_rate": 7.844781233115176e-05, "loss": 0.0336, "step": 17100 }, { "epoch": 12.335976928622927, "grad_norm": 0.13939359784126282, "learning_rate": 7.842061090456996e-05, "loss": 0.0355, "step": 17110 }, { "epoch": 12.343186733958182, "grad_norm": 0.1322089582681656, "learning_rate": 7.839339704584736e-05, "loss": 0.0359, "step": 17120 }, { "epoch": 12.350396539293438, "grad_norm": 0.1603846400976181, "learning_rate": 7.836617076688828e-05, "loss": 0.0376, "step": 17130 }, { "epoch": 12.357606344628696, "grad_norm": 0.2268206775188446, "learning_rate": 7.833893207960236e-05, "loss": 0.0486, "step": 17140 }, { "epoch": 12.364816149963952, "grad_norm": 0.2621496319770813, "learning_rate": 7.831168099590478e-05, "loss": 0.037, "step": 17150 }, { "epoch": 12.372025955299208, "grad_norm": 0.13571788370609283, "learning_rate": 7.828441752771607e-05, "loss": 0.036, "step": 17160 }, { "epoch": 12.379235760634463, "grad_norm": 0.14275553822517395, "learning_rate": 7.825714168696219e-05, "loss": 0.0328, "step": 17170 }, { "epoch": 12.38644556596972, "grad_norm": 0.131187304854393, "learning_rate": 7.82298534855745e-05, "loss": 0.0337, "step": 17180 }, { "epoch": 12.393655371304975, "grad_norm": 0.1536608189344406, "learning_rate": 7.820255293548984e-05, "loss": 0.033, "step": 17190 }, { "epoch": 12.400865176640231, "grad_norm": 0.1407230794429779, "learning_rate": 7.817524004865035e-05, "loss": 0.0327, "step": 17200 }, { "epoch": 12.408074981975487, "grad_norm": 0.13717405498027802, "learning_rate": 7.814791483700366e-05, "loss": 0.0387, "step": 17210 }, { "epoch": 12.415284787310743, "grad_norm": 0.2139778882265091, "learning_rate": 7.81205773125027e-05, "loss": 0.0369, "step": 17220 }, { "epoch": 12.422494592645998, "grad_norm": 0.1636853665113449, "learning_rate": 7.809322748710585e-05, "loss": 0.0337, "step": 17230 }, { "epoch": 12.429704397981254, "grad_norm": 0.13988322019577026, "learning_rate": 7.806586537277685e-05, "loss": 0.035, "step": 17240 }, { "epoch": 12.43691420331651, "grad_norm": 0.18771743774414062, "learning_rate": 7.803849098148484e-05, "loss": 0.0428, "step": 17250 }, { "epoch": 12.444124008651766, "grad_norm": 0.11614210903644562, "learning_rate": 7.801110432520427e-05, "loss": 0.0318, "step": 17260 }, { "epoch": 12.451333813987022, "grad_norm": 0.18865787982940674, "learning_rate": 7.798370541591504e-05, "loss": 0.0375, "step": 17270 }, { "epoch": 12.458543619322278, "grad_norm": 0.15657725930213928, "learning_rate": 7.795629426560232e-05, "loss": 0.0376, "step": 17280 }, { "epoch": 12.465753424657533, "grad_norm": 0.14690130949020386, "learning_rate": 7.792887088625671e-05, "loss": 0.042, "step": 17290 }, { "epoch": 12.47296322999279, "grad_norm": 0.1873258799314499, "learning_rate": 7.790143528987412e-05, "loss": 0.0342, "step": 17300 }, { "epoch": 12.480173035328047, "grad_norm": 0.1255352944135666, "learning_rate": 7.787398748845578e-05, "loss": 0.0354, "step": 17310 }, { "epoch": 12.487382840663303, "grad_norm": 0.16975785791873932, "learning_rate": 7.784652749400834e-05, "loss": 0.037, "step": 17320 }, { "epoch": 12.494592645998559, "grad_norm": 0.1075579896569252, "learning_rate": 7.78190553185437e-05, "loss": 0.0302, "step": 17330 }, { "epoch": 12.501802451333814, "grad_norm": 0.11939030140638351, "learning_rate": 7.779157097407915e-05, "loss": 0.0397, "step": 17340 }, { "epoch": 12.50901225666907, "grad_norm": 0.18510498106479645, "learning_rate": 7.776407447263725e-05, "loss": 0.0409, "step": 17350 }, { "epoch": 12.516222062004326, "grad_norm": 0.1304885894060135, "learning_rate": 7.773656582624593e-05, "loss": 0.0418, "step": 17360 }, { "epoch": 12.523431867339582, "grad_norm": 0.15163776278495789, "learning_rate": 7.770904504693837e-05, "loss": 0.0342, "step": 17370 }, { "epoch": 12.530641672674838, "grad_norm": 0.14486177265644073, "learning_rate": 7.768151214675314e-05, "loss": 0.0325, "step": 17380 }, { "epoch": 12.537851478010094, "grad_norm": 0.10368482768535614, "learning_rate": 7.765396713773402e-05, "loss": 0.0312, "step": 17390 }, { "epoch": 12.54506128334535, "grad_norm": 0.17289309203624725, "learning_rate": 7.762641003193017e-05, "loss": 0.0307, "step": 17400 }, { "epoch": 12.552271088680605, "grad_norm": 0.13676056265830994, "learning_rate": 7.759884084139597e-05, "loss": 0.0336, "step": 17410 }, { "epoch": 12.559480894015861, "grad_norm": 0.12069832533597946, "learning_rate": 7.757125957819116e-05, "loss": 0.0343, "step": 17420 }, { "epoch": 12.566690699351117, "grad_norm": 0.16983741521835327, "learning_rate": 7.754366625438069e-05, "loss": 0.0378, "step": 17430 }, { "epoch": 12.573900504686373, "grad_norm": 0.10796020179986954, "learning_rate": 7.751606088203483e-05, "loss": 0.0381, "step": 17440 }, { "epoch": 12.581110310021629, "grad_norm": 0.11690344661474228, "learning_rate": 7.74884434732291e-05, "loss": 0.0317, "step": 17450 }, { "epoch": 12.588320115356886, "grad_norm": 0.1749570369720459, "learning_rate": 7.746081404004431e-05, "loss": 0.0369, "step": 17460 }, { "epoch": 12.595529920692142, "grad_norm": 0.12512898445129395, "learning_rate": 7.743317259456649e-05, "loss": 0.035, "step": 17470 }, { "epoch": 12.602739726027398, "grad_norm": 0.12278465181589127, "learning_rate": 7.740551914888696e-05, "loss": 0.0341, "step": 17480 }, { "epoch": 12.609949531362654, "grad_norm": 0.14313672482967377, "learning_rate": 7.737785371510229e-05, "loss": 0.0419, "step": 17490 }, { "epoch": 12.61715933669791, "grad_norm": 0.11129703372716904, "learning_rate": 7.735017630531426e-05, "loss": 0.0318, "step": 17500 }, { "epoch": 12.624369142033165, "grad_norm": 0.11583685874938965, "learning_rate": 7.732248693162991e-05, "loss": 0.028, "step": 17510 }, { "epoch": 12.631578947368421, "grad_norm": 0.12257280945777893, "learning_rate": 7.729478560616153e-05, "loss": 0.0341, "step": 17520 }, { "epoch": 12.638788752703677, "grad_norm": 0.11482755839824677, "learning_rate": 7.726707234102659e-05, "loss": 0.0345, "step": 17530 }, { "epoch": 12.645998558038933, "grad_norm": 0.14678595960140228, "learning_rate": 7.723934714834785e-05, "loss": 0.037, "step": 17540 }, { "epoch": 12.653208363374189, "grad_norm": 0.17989835143089294, "learning_rate": 7.721161004025323e-05, "loss": 0.0442, "step": 17550 }, { "epoch": 12.660418168709445, "grad_norm": 0.20005278289318085, "learning_rate": 7.718386102887588e-05, "loss": 0.0346, "step": 17560 }, { "epoch": 12.6676279740447, "grad_norm": 0.14067265391349792, "learning_rate": 7.715610012635418e-05, "loss": 0.0318, "step": 17570 }, { "epoch": 12.674837779379956, "grad_norm": 0.1447896659374237, "learning_rate": 7.712832734483166e-05, "loss": 0.039, "step": 17580 }, { "epoch": 12.682047584715212, "grad_norm": 0.11321870982646942, "learning_rate": 7.710054269645713e-05, "loss": 0.0383, "step": 17590 }, { "epoch": 12.689257390050468, "grad_norm": 0.14936189353466034, "learning_rate": 7.707274619338449e-05, "loss": 0.033, "step": 17600 }, { "epoch": 12.696467195385724, "grad_norm": 0.1310126930475235, "learning_rate": 7.704493784777289e-05, "loss": 0.0317, "step": 17610 }, { "epoch": 12.70367700072098, "grad_norm": 0.13935674726963043, "learning_rate": 7.701711767178665e-05, "loss": 0.0349, "step": 17620 }, { "epoch": 12.710886806056237, "grad_norm": 0.10630200803279877, "learning_rate": 7.698928567759525e-05, "loss": 0.0318, "step": 17630 }, { "epoch": 12.718096611391493, "grad_norm": 0.14092056453227997, "learning_rate": 7.696144187737335e-05, "loss": 0.0361, "step": 17640 }, { "epoch": 12.725306416726749, "grad_norm": 0.13530296087265015, "learning_rate": 7.69335862833008e-05, "loss": 0.039, "step": 17650 }, { "epoch": 12.732516222062005, "grad_norm": 0.16132999956607819, "learning_rate": 7.690571890756256e-05, "loss": 0.035, "step": 17660 }, { "epoch": 12.73972602739726, "grad_norm": 0.1312439888715744, "learning_rate": 7.687783976234877e-05, "loss": 0.0376, "step": 17670 }, { "epoch": 12.746935832732516, "grad_norm": 0.09077254682779312, "learning_rate": 7.684994885985472e-05, "loss": 0.0419, "step": 17680 }, { "epoch": 12.754145638067772, "grad_norm": 0.12631534039974213, "learning_rate": 7.682204621228083e-05, "loss": 0.0347, "step": 17690 }, { "epoch": 12.761355443403028, "grad_norm": 0.16382178664207458, "learning_rate": 7.679413183183267e-05, "loss": 0.0321, "step": 17700 }, { "epoch": 12.768565248738284, "grad_norm": 0.1533634513616562, "learning_rate": 7.676620573072093e-05, "loss": 0.0389, "step": 17710 }, { "epoch": 12.77577505407354, "grad_norm": 0.1416155993938446, "learning_rate": 7.673826792116145e-05, "loss": 0.0321, "step": 17720 }, { "epoch": 12.782984859408796, "grad_norm": 0.1852593719959259, "learning_rate": 7.671031841537519e-05, "loss": 0.0355, "step": 17730 }, { "epoch": 12.790194664744051, "grad_norm": 0.1387527734041214, "learning_rate": 7.668235722558819e-05, "loss": 0.0352, "step": 17740 }, { "epoch": 12.797404470079307, "grad_norm": 0.16635634005069733, "learning_rate": 7.665438436403161e-05, "loss": 0.0344, "step": 17750 }, { "epoch": 12.804614275414563, "grad_norm": 0.1493126004934311, "learning_rate": 7.662639984294178e-05, "loss": 0.0292, "step": 17760 }, { "epoch": 12.811824080749819, "grad_norm": 0.17671501636505127, "learning_rate": 7.659840367456002e-05, "loss": 0.0419, "step": 17770 }, { "epoch": 12.819033886085077, "grad_norm": 0.11586099117994308, "learning_rate": 7.657039587113287e-05, "loss": 0.0333, "step": 17780 }, { "epoch": 12.826243691420332, "grad_norm": 0.11791113764047623, "learning_rate": 7.654237644491182e-05, "loss": 0.0407, "step": 17790 }, { "epoch": 12.833453496755588, "grad_norm": 0.13120238482952118, "learning_rate": 7.651434540815358e-05, "loss": 0.0342, "step": 17800 }, { "epoch": 12.840663302090844, "grad_norm": 0.1658383011817932, "learning_rate": 7.648630277311986e-05, "loss": 0.0394, "step": 17810 }, { "epoch": 12.8478731074261, "grad_norm": 0.11964215338230133, "learning_rate": 7.645824855207744e-05, "loss": 0.036, "step": 17820 }, { "epoch": 12.855082912761356, "grad_norm": 0.08691275864839554, "learning_rate": 7.643018275729821e-05, "loss": 0.0353, "step": 17830 }, { "epoch": 12.862292718096612, "grad_norm": 0.1250937432050705, "learning_rate": 7.640210540105911e-05, "loss": 0.0353, "step": 17840 }, { "epoch": 12.869502523431867, "grad_norm": 0.11645068228244781, "learning_rate": 7.637401649564213e-05, "loss": 0.0377, "step": 17850 }, { "epoch": 12.876712328767123, "grad_norm": 0.15254004299640656, "learning_rate": 7.63459160533343e-05, "loss": 0.0448, "step": 17860 }, { "epoch": 12.883922134102379, "grad_norm": 0.14732249081134796, "learning_rate": 7.631780408642769e-05, "loss": 0.0334, "step": 17870 }, { "epoch": 12.891131939437635, "grad_norm": 0.1397245228290558, "learning_rate": 7.628968060721946e-05, "loss": 0.0317, "step": 17880 }, { "epoch": 12.89834174477289, "grad_norm": 0.1755395084619522, "learning_rate": 7.626154562801176e-05, "loss": 0.037, "step": 17890 }, { "epoch": 12.905551550108147, "grad_norm": 0.1376183032989502, "learning_rate": 7.623339916111177e-05, "loss": 0.0326, "step": 17900 }, { "epoch": 12.912761355443402, "grad_norm": 0.14072419703006744, "learning_rate": 7.620524121883174e-05, "loss": 0.0355, "step": 17910 }, { "epoch": 12.919971160778658, "grad_norm": 0.12743675708770752, "learning_rate": 7.617707181348892e-05, "loss": 0.0316, "step": 17920 }, { "epoch": 12.927180966113914, "grad_norm": 0.15700766444206238, "learning_rate": 7.614889095740554e-05, "loss": 0.0375, "step": 17930 }, { "epoch": 12.93439077144917, "grad_norm": 0.1311388462781906, "learning_rate": 7.612069866290885e-05, "loss": 0.0324, "step": 17940 }, { "epoch": 12.941600576784428, "grad_norm": 0.17765113711357117, "learning_rate": 7.609249494233115e-05, "loss": 0.0367, "step": 17950 }, { "epoch": 12.948810382119683, "grad_norm": 0.09684959799051285, "learning_rate": 7.60642798080097e-05, "loss": 0.032, "step": 17960 }, { "epoch": 12.95602018745494, "grad_norm": 0.15974058210849762, "learning_rate": 7.603605327228673e-05, "loss": 0.0377, "step": 17970 }, { "epoch": 12.963229992790195, "grad_norm": 0.10899148136377335, "learning_rate": 7.600781534750953e-05, "loss": 0.0373, "step": 17980 }, { "epoch": 12.970439798125451, "grad_norm": 0.1450553834438324, "learning_rate": 7.597956604603029e-05, "loss": 0.0351, "step": 17990 }, { "epoch": 12.977649603460707, "grad_norm": 0.15258538722991943, "learning_rate": 7.595130538020622e-05, "loss": 0.0366, "step": 18000 }, { "epoch": 12.984859408795963, "grad_norm": 0.16799362003803253, "learning_rate": 7.592303336239952e-05, "loss": 0.0399, "step": 18010 }, { "epoch": 12.992069214131218, "grad_norm": 0.14839377999305725, "learning_rate": 7.589475000497733e-05, "loss": 0.0392, "step": 18020 }, { "epoch": 12.999279019466474, "grad_norm": 0.1865444928407669, "learning_rate": 7.586645532031171e-05, "loss": 0.0378, "step": 18030 }, { "epoch": 13.00648882480173, "grad_norm": 0.20700907707214355, "learning_rate": 7.583814932077975e-05, "loss": 0.0374, "step": 18040 }, { "epoch": 13.013698630136986, "grad_norm": 0.1356549710035324, "learning_rate": 7.580983201876343e-05, "loss": 0.0375, "step": 18050 }, { "epoch": 13.020908435472242, "grad_norm": 0.10501468181610107, "learning_rate": 7.578150342664973e-05, "loss": 0.0389, "step": 18060 }, { "epoch": 13.028118240807498, "grad_norm": 0.13086079061031342, "learning_rate": 7.575316355683055e-05, "loss": 0.0345, "step": 18070 }, { "epoch": 13.035328046142753, "grad_norm": 0.13619942963123322, "learning_rate": 7.572481242170266e-05, "loss": 0.0351, "step": 18080 }, { "epoch": 13.04253785147801, "grad_norm": 0.10707290470600128, "learning_rate": 7.569645003366786e-05, "loss": 0.0374, "step": 18090 }, { "epoch": 13.049747656813267, "grad_norm": 0.12342571467161179, "learning_rate": 7.566807640513278e-05, "loss": 0.0302, "step": 18100 }, { "epoch": 13.056957462148523, "grad_norm": 0.2201484590768814, "learning_rate": 7.563969154850905e-05, "loss": 0.0364, "step": 18110 }, { "epoch": 13.064167267483779, "grad_norm": 0.1251773238182068, "learning_rate": 7.561129547621314e-05, "loss": 0.0336, "step": 18120 }, { "epoch": 13.071377072819034, "grad_norm": 0.11644922941923141, "learning_rate": 7.558288820066648e-05, "loss": 0.0332, "step": 18130 }, { "epoch": 13.07858687815429, "grad_norm": 0.1383868008852005, "learning_rate": 7.555446973429534e-05, "loss": 0.0279, "step": 18140 }, { "epoch": 13.085796683489546, "grad_norm": 0.19703719019889832, "learning_rate": 7.552604008953097e-05, "loss": 0.0426, "step": 18150 }, { "epoch": 13.093006488824802, "grad_norm": 0.1424456536769867, "learning_rate": 7.549759927880942e-05, "loss": 0.0279, "step": 18160 }, { "epoch": 13.100216294160058, "grad_norm": 0.2173936367034912, "learning_rate": 7.54691473145717e-05, "loss": 0.041, "step": 18170 }, { "epoch": 13.107426099495314, "grad_norm": 0.15311671793460846, "learning_rate": 7.544068420926365e-05, "loss": 0.0349, "step": 18180 }, { "epoch": 13.11463590483057, "grad_norm": 0.10733092576265335, "learning_rate": 7.5412209975336e-05, "loss": 0.0352, "step": 18190 }, { "epoch": 13.121845710165825, "grad_norm": 0.11136525869369507, "learning_rate": 7.538372462524435e-05, "loss": 0.0333, "step": 18200 }, { "epoch": 13.129055515501081, "grad_norm": 0.13570839166641235, "learning_rate": 7.535522817144915e-05, "loss": 0.0325, "step": 18210 }, { "epoch": 13.136265320836337, "grad_norm": 0.15569303929805756, "learning_rate": 7.532672062641575e-05, "loss": 0.0332, "step": 18220 }, { "epoch": 13.143475126171593, "grad_norm": 0.10160143673419952, "learning_rate": 7.529820200261427e-05, "loss": 0.0307, "step": 18230 }, { "epoch": 13.150684931506849, "grad_norm": 0.14172476530075073, "learning_rate": 7.526967231251977e-05, "loss": 0.0372, "step": 18240 }, { "epoch": 13.157894736842104, "grad_norm": 0.13096393644809723, "learning_rate": 7.524113156861206e-05, "loss": 0.0337, "step": 18250 }, { "epoch": 13.165104542177362, "grad_norm": 0.16616757214069366, "learning_rate": 7.521257978337587e-05, "loss": 0.0394, "step": 18260 }, { "epoch": 13.172314347512618, "grad_norm": 0.15215879678726196, "learning_rate": 7.518401696930072e-05, "loss": 0.0348, "step": 18270 }, { "epoch": 13.179524152847874, "grad_norm": 0.14877289533615112, "learning_rate": 7.515544313888092e-05, "loss": 0.0395, "step": 18280 }, { "epoch": 13.18673395818313, "grad_norm": 0.13260604441165924, "learning_rate": 7.512685830461568e-05, "loss": 0.0278, "step": 18290 }, { "epoch": 13.193943763518385, "grad_norm": 0.15297335386276245, "learning_rate": 7.509826247900892e-05, "loss": 0.0365, "step": 18300 }, { "epoch": 13.201153568853641, "grad_norm": 0.16090340912342072, "learning_rate": 7.50696556745695e-05, "loss": 0.0376, "step": 18310 }, { "epoch": 13.208363374188897, "grad_norm": 0.13508884608745575, "learning_rate": 7.504103790381095e-05, "loss": 0.0375, "step": 18320 }, { "epoch": 13.215573179524153, "grad_norm": 0.13608862459659576, "learning_rate": 7.501240917925167e-05, "loss": 0.029, "step": 18330 }, { "epoch": 13.222782984859409, "grad_norm": 0.14788153767585754, "learning_rate": 7.498376951341483e-05, "loss": 0.0365, "step": 18340 }, { "epoch": 13.229992790194665, "grad_norm": 0.12635429203510284, "learning_rate": 7.495511891882844e-05, "loss": 0.0325, "step": 18350 }, { "epoch": 13.23720259552992, "grad_norm": 0.1354197859764099, "learning_rate": 7.492645740802517e-05, "loss": 0.0328, "step": 18360 }, { "epoch": 13.244412400865176, "grad_norm": 0.11606433987617493, "learning_rate": 7.489778499354259e-05, "loss": 0.0348, "step": 18370 }, { "epoch": 13.251622206200432, "grad_norm": 0.09816557914018631, "learning_rate": 7.486910168792298e-05, "loss": 0.0361, "step": 18380 }, { "epoch": 13.258832011535688, "grad_norm": 0.11838022619485855, "learning_rate": 7.484040750371337e-05, "loss": 0.0371, "step": 18390 }, { "epoch": 13.266041816870944, "grad_norm": 0.1729596108198166, "learning_rate": 7.48117024534656e-05, "loss": 0.0378, "step": 18400 }, { "epoch": 13.2732516222062, "grad_norm": 0.1482478529214859, "learning_rate": 7.478298654973621e-05, "loss": 0.0401, "step": 18410 }, { "epoch": 13.280461427541457, "grad_norm": 0.13275474309921265, "learning_rate": 7.475425980508654e-05, "loss": 0.0362, "step": 18420 }, { "epoch": 13.287671232876713, "grad_norm": 0.1280287653207779, "learning_rate": 7.47255222320826e-05, "loss": 0.0331, "step": 18430 }, { "epoch": 13.294881038211969, "grad_norm": 0.13030284643173218, "learning_rate": 7.469677384329522e-05, "loss": 0.0314, "step": 18440 }, { "epoch": 13.302090843547225, "grad_norm": 0.12679484486579895, "learning_rate": 7.46680146512999e-05, "loss": 0.0369, "step": 18450 }, { "epoch": 13.30930064888248, "grad_norm": 0.13857018947601318, "learning_rate": 7.463924466867692e-05, "loss": 0.0343, "step": 18460 }, { "epoch": 13.316510454217736, "grad_norm": 0.198194682598114, "learning_rate": 7.461046390801121e-05, "loss": 0.0373, "step": 18470 }, { "epoch": 13.323720259552992, "grad_norm": 0.12128539383411407, "learning_rate": 7.458167238189248e-05, "loss": 0.035, "step": 18480 }, { "epoch": 13.330930064888248, "grad_norm": 0.17245356738567352, "learning_rate": 7.455287010291509e-05, "loss": 0.03, "step": 18490 }, { "epoch": 13.338139870223504, "grad_norm": 0.1373588591814041, "learning_rate": 7.452405708367818e-05, "loss": 0.0343, "step": 18500 }, { "epoch": 13.34534967555876, "grad_norm": 0.10586219280958176, "learning_rate": 7.449523333678549e-05, "loss": 0.0324, "step": 18510 }, { "epoch": 13.352559480894016, "grad_norm": 0.10082490742206573, "learning_rate": 7.446639887484555e-05, "loss": 0.0372, "step": 18520 }, { "epoch": 13.359769286229271, "grad_norm": 0.13439694046974182, "learning_rate": 7.443755371047152e-05, "loss": 0.0336, "step": 18530 }, { "epoch": 13.366979091564527, "grad_norm": 0.14732784032821655, "learning_rate": 7.440869785628124e-05, "loss": 0.0355, "step": 18540 }, { "epoch": 13.374188896899783, "grad_norm": 0.14290151000022888, "learning_rate": 7.437983132489727e-05, "loss": 0.0418, "step": 18550 }, { "epoch": 13.381398702235039, "grad_norm": 0.11371380090713501, "learning_rate": 7.435095412894678e-05, "loss": 0.03, "step": 18560 }, { "epoch": 13.388608507570295, "grad_norm": 0.14992652833461761, "learning_rate": 7.432206628106165e-05, "loss": 0.0403, "step": 18570 }, { "epoch": 13.395818312905552, "grad_norm": 0.14038081467151642, "learning_rate": 7.429316779387842e-05, "loss": 0.0333, "step": 18580 }, { "epoch": 13.403028118240808, "grad_norm": 0.1477205604314804, "learning_rate": 7.426425868003825e-05, "loss": 0.0341, "step": 18590 }, { "epoch": 13.410237923576064, "grad_norm": 0.17327439785003662, "learning_rate": 7.423533895218698e-05, "loss": 0.0369, "step": 18600 }, { "epoch": 13.41744772891132, "grad_norm": 0.20820176601409912, "learning_rate": 7.420640862297507e-05, "loss": 0.0375, "step": 18610 }, { "epoch": 13.424657534246576, "grad_norm": 0.12173148989677429, "learning_rate": 7.417746770505766e-05, "loss": 0.037, "step": 18620 }, { "epoch": 13.431867339581832, "grad_norm": 0.12236720323562622, "learning_rate": 7.414851621109444e-05, "loss": 0.0293, "step": 18630 }, { "epoch": 13.439077144917087, "grad_norm": 0.0979873314499855, "learning_rate": 7.411955415374982e-05, "loss": 0.0323, "step": 18640 }, { "epoch": 13.446286950252343, "grad_norm": 0.11166471242904663, "learning_rate": 7.409058154569278e-05, "loss": 0.0313, "step": 18650 }, { "epoch": 13.453496755587599, "grad_norm": 0.13623759150505066, "learning_rate": 7.406159839959689e-05, "loss": 0.035, "step": 18660 }, { "epoch": 13.460706560922855, "grad_norm": 0.12151531875133514, "learning_rate": 7.403260472814039e-05, "loss": 0.0324, "step": 18670 }, { "epoch": 13.46791636625811, "grad_norm": 0.18460361659526825, "learning_rate": 7.400360054400611e-05, "loss": 0.0453, "step": 18680 }, { "epoch": 13.475126171593367, "grad_norm": 0.14411267638206482, "learning_rate": 7.397458585988143e-05, "loss": 0.0351, "step": 18690 }, { "epoch": 13.482335976928622, "grad_norm": 0.15308749675750732, "learning_rate": 7.394556068845841e-05, "loss": 0.0325, "step": 18700 }, { "epoch": 13.489545782263878, "grad_norm": 0.17242370545864105, "learning_rate": 7.39165250424336e-05, "loss": 0.0354, "step": 18710 }, { "epoch": 13.496755587599134, "grad_norm": 0.12931516766548157, "learning_rate": 7.38874789345082e-05, "loss": 0.0347, "step": 18720 }, { "epoch": 13.50396539293439, "grad_norm": 0.13629184663295746, "learning_rate": 7.385842237738794e-05, "loss": 0.0359, "step": 18730 }, { "epoch": 13.511175198269648, "grad_norm": 0.22195838391780853, "learning_rate": 7.382935538378318e-05, "loss": 0.0337, "step": 18740 }, { "epoch": 13.518385003604903, "grad_norm": 0.0753406509757042, "learning_rate": 7.38002779664088e-05, "loss": 0.0332, "step": 18750 }, { "epoch": 13.52559480894016, "grad_norm": 0.14623776078224182, "learning_rate": 7.377119013798422e-05, "loss": 0.034, "step": 18760 }, { "epoch": 13.532804614275415, "grad_norm": 0.11014153808355331, "learning_rate": 7.374209191123349e-05, "loss": 0.0315, "step": 18770 }, { "epoch": 13.54001441961067, "grad_norm": 0.13911019265651703, "learning_rate": 7.371298329888512e-05, "loss": 0.0344, "step": 18780 }, { "epoch": 13.547224224945927, "grad_norm": 0.13031171262264252, "learning_rate": 7.368386431367223e-05, "loss": 0.0358, "step": 18790 }, { "epoch": 13.554434030281183, "grad_norm": 0.1735212355852127, "learning_rate": 7.365473496833246e-05, "loss": 0.0394, "step": 18800 }, { "epoch": 13.561643835616438, "grad_norm": 0.15669576823711395, "learning_rate": 7.362559527560797e-05, "loss": 0.0319, "step": 18810 }, { "epoch": 13.568853640951694, "grad_norm": 0.1827627271413803, "learning_rate": 7.359644524824545e-05, "loss": 0.035, "step": 18820 }, { "epoch": 13.57606344628695, "grad_norm": 0.1702619343996048, "learning_rate": 7.35672848989961e-05, "loss": 0.0326, "step": 18830 }, { "epoch": 13.583273251622206, "grad_norm": 0.1576993316411972, "learning_rate": 7.353811424061565e-05, "loss": 0.032, "step": 18840 }, { "epoch": 13.590483056957462, "grad_norm": 0.11049839109182358, "learning_rate": 7.350893328586435e-05, "loss": 0.0313, "step": 18850 }, { "epoch": 13.597692862292718, "grad_norm": 0.11988016217947006, "learning_rate": 7.347974204750696e-05, "loss": 0.0332, "step": 18860 }, { "epoch": 13.604902667627973, "grad_norm": 0.11945955455303192, "learning_rate": 7.345054053831266e-05, "loss": 0.037, "step": 18870 }, { "epoch": 13.61211247296323, "grad_norm": 0.184857577085495, "learning_rate": 7.342132877105523e-05, "loss": 0.0323, "step": 18880 }, { "epoch": 13.619322278298487, "grad_norm": 0.1439443975687027, "learning_rate": 7.339210675851288e-05, "loss": 0.0354, "step": 18890 }, { "epoch": 13.626532083633743, "grad_norm": 0.13758864998817444, "learning_rate": 7.336287451346831e-05, "loss": 0.0338, "step": 18900 }, { "epoch": 13.633741888968999, "grad_norm": 0.13146482408046722, "learning_rate": 7.333363204870868e-05, "loss": 0.0363, "step": 18910 }, { "epoch": 13.640951694304254, "grad_norm": 0.1158648207783699, "learning_rate": 7.330437937702566e-05, "loss": 0.0337, "step": 18920 }, { "epoch": 13.64816149963951, "grad_norm": 0.18389123678207397, "learning_rate": 7.327511651121534e-05, "loss": 0.0416, "step": 18930 }, { "epoch": 13.655371304974766, "grad_norm": 0.12986809015274048, "learning_rate": 7.324584346407833e-05, "loss": 0.034, "step": 18940 }, { "epoch": 13.662581110310022, "grad_norm": 0.11193780601024628, "learning_rate": 7.321656024841962e-05, "loss": 0.033, "step": 18950 }, { "epoch": 13.669790915645278, "grad_norm": 0.181573748588562, "learning_rate": 7.31872668770487e-05, "loss": 0.0314, "step": 18960 }, { "epoch": 13.677000720980534, "grad_norm": 0.1464328020811081, "learning_rate": 7.315796336277949e-05, "loss": 0.0392, "step": 18970 }, { "epoch": 13.68421052631579, "grad_norm": 0.14960245788097382, "learning_rate": 7.312864971843031e-05, "loss": 0.0304, "step": 18980 }, { "epoch": 13.691420331651045, "grad_norm": 0.10695874691009521, "learning_rate": 7.309932595682398e-05, "loss": 0.0322, "step": 18990 }, { "epoch": 13.698630136986301, "grad_norm": 0.13011623919010162, "learning_rate": 7.30699920907877e-05, "loss": 0.0292, "step": 19000 }, { "epoch": 13.705839942321557, "grad_norm": 0.1877172887325287, "learning_rate": 7.304064813315308e-05, "loss": 0.0325, "step": 19010 }, { "epoch": 13.713049747656813, "grad_norm": 0.11271163821220398, "learning_rate": 7.30112940967562e-05, "loss": 0.0314, "step": 19020 }, { "epoch": 13.720259552992069, "grad_norm": 0.184635192155838, "learning_rate": 7.298192999443747e-05, "loss": 0.0381, "step": 19030 }, { "epoch": 13.727469358327324, "grad_norm": 0.12854786217212677, "learning_rate": 7.295255583904179e-05, "loss": 0.0332, "step": 19040 }, { "epoch": 13.73467916366258, "grad_norm": 0.16094474494457245, "learning_rate": 7.29231716434184e-05, "loss": 0.0334, "step": 19050 }, { "epoch": 13.741888968997838, "grad_norm": 0.12176866084337234, "learning_rate": 7.289377742042092e-05, "loss": 0.0302, "step": 19060 }, { "epoch": 13.749098774333094, "grad_norm": 0.14175204932689667, "learning_rate": 7.286437318290741e-05, "loss": 0.0304, "step": 19070 }, { "epoch": 13.75630857966835, "grad_norm": 0.14082714915275574, "learning_rate": 7.283495894374027e-05, "loss": 0.0355, "step": 19080 }, { "epoch": 13.763518385003605, "grad_norm": 0.12579979002475739, "learning_rate": 7.280553471578629e-05, "loss": 0.0309, "step": 19090 }, { "epoch": 13.770728190338861, "grad_norm": 0.09919718652963638, "learning_rate": 7.277610051191663e-05, "loss": 0.0327, "step": 19100 }, { "epoch": 13.777937995674117, "grad_norm": 0.1592087745666504, "learning_rate": 7.27466563450068e-05, "loss": 0.037, "step": 19110 }, { "epoch": 13.785147801009373, "grad_norm": 0.1204904243350029, "learning_rate": 7.271720222793668e-05, "loss": 0.0312, "step": 19120 }, { "epoch": 13.792357606344629, "grad_norm": 0.1652964949607849, "learning_rate": 7.268773817359054e-05, "loss": 0.0369, "step": 19130 }, { "epoch": 13.799567411679885, "grad_norm": 0.12427201122045517, "learning_rate": 7.265826419485689e-05, "loss": 0.0333, "step": 19140 }, { "epoch": 13.80677721701514, "grad_norm": 0.1265077143907547, "learning_rate": 7.26287803046287e-05, "loss": 0.0289, "step": 19150 }, { "epoch": 13.813987022350396, "grad_norm": 0.20007628202438354, "learning_rate": 7.259928651580322e-05, "loss": 0.0398, "step": 19160 }, { "epoch": 13.821196827685652, "grad_norm": 0.12089507281780243, "learning_rate": 7.256978284128202e-05, "loss": 0.0342, "step": 19170 }, { "epoch": 13.828406633020908, "grad_norm": 0.19535070657730103, "learning_rate": 7.254026929397101e-05, "loss": 0.0367, "step": 19180 }, { "epoch": 13.835616438356164, "grad_norm": 0.13546693325042725, "learning_rate": 7.251074588678044e-05, "loss": 0.0337, "step": 19190 }, { "epoch": 13.84282624369142, "grad_norm": 0.13714949786663055, "learning_rate": 7.248121263262482e-05, "loss": 0.0322, "step": 19200 }, { "epoch": 13.850036049026677, "grad_norm": 0.14886946976184845, "learning_rate": 7.245166954442304e-05, "loss": 0.0318, "step": 19210 }, { "epoch": 13.857245854361933, "grad_norm": 0.16493459045886993, "learning_rate": 7.24221166350982e-05, "loss": 0.0341, "step": 19220 }, { "epoch": 13.864455659697189, "grad_norm": 0.16526442766189575, "learning_rate": 7.239255391757777e-05, "loss": 0.0339, "step": 19230 }, { "epoch": 13.871665465032445, "grad_norm": 0.17720085382461548, "learning_rate": 7.236298140479351e-05, "loss": 0.041, "step": 19240 }, { "epoch": 13.8788752703677, "grad_norm": 0.1392505019903183, "learning_rate": 7.233339910968141e-05, "loss": 0.0356, "step": 19250 }, { "epoch": 13.886085075702956, "grad_norm": 0.157710000872612, "learning_rate": 7.230380704518178e-05, "loss": 0.0389, "step": 19260 }, { "epoch": 13.893294881038212, "grad_norm": 0.11559721827507019, "learning_rate": 7.227420522423919e-05, "loss": 0.0333, "step": 19270 }, { "epoch": 13.900504686373468, "grad_norm": 0.11816000193357468, "learning_rate": 7.224459365980249e-05, "loss": 0.0337, "step": 19280 }, { "epoch": 13.907714491708724, "grad_norm": 0.18059343099594116, "learning_rate": 7.221497236482479e-05, "loss": 0.0382, "step": 19290 }, { "epoch": 13.91492429704398, "grad_norm": 0.1538889855146408, "learning_rate": 7.218534135226345e-05, "loss": 0.0369, "step": 19300 }, { "epoch": 13.922134102379236, "grad_norm": 0.13854368031024933, "learning_rate": 7.215570063508005e-05, "loss": 0.0321, "step": 19310 }, { "epoch": 13.929343907714491, "grad_norm": 0.15010693669319153, "learning_rate": 7.21260502262405e-05, "loss": 0.0341, "step": 19320 }, { "epoch": 13.936553713049747, "grad_norm": 0.13441435992717743, "learning_rate": 7.209639013871487e-05, "loss": 0.032, "step": 19330 }, { "epoch": 13.943763518385003, "grad_norm": 0.13096362352371216, "learning_rate": 7.206672038547746e-05, "loss": 0.0324, "step": 19340 }, { "epoch": 13.950973323720259, "grad_norm": 0.12972034513950348, "learning_rate": 7.203704097950687e-05, "loss": 0.0322, "step": 19350 }, { "epoch": 13.958183129055515, "grad_norm": 0.13502155244350433, "learning_rate": 7.200735193378587e-05, "loss": 0.034, "step": 19360 }, { "epoch": 13.96539293439077, "grad_norm": 0.17967812716960907, "learning_rate": 7.197765326130145e-05, "loss": 0.0316, "step": 19370 }, { "epoch": 13.972602739726028, "grad_norm": 0.12791888415813446, "learning_rate": 7.194794497504481e-05, "loss": 0.0312, "step": 19380 }, { "epoch": 13.979812545061284, "grad_norm": 0.11920975148677826, "learning_rate": 7.19182270880114e-05, "loss": 0.0315, "step": 19390 }, { "epoch": 13.98702235039654, "grad_norm": 0.10280419886112213, "learning_rate": 7.188849961320079e-05, "loss": 0.0354, "step": 19400 }, { "epoch": 13.994232155731796, "grad_norm": 0.14567764103412628, "learning_rate": 7.185876256361681e-05, "loss": 0.0354, "step": 19410 }, { "epoch": 14.001441961067052, "grad_norm": 0.14258936047554016, "learning_rate": 7.182901595226745e-05, "loss": 0.0414, "step": 19420 }, { "epoch": 14.008651766402307, "grad_norm": 0.15196536481380463, "learning_rate": 7.179925979216491e-05, "loss": 0.0337, "step": 19430 }, { "epoch": 14.015861571737563, "grad_norm": 0.12636640667915344, "learning_rate": 7.17694940963255e-05, "loss": 0.032, "step": 19440 }, { "epoch": 14.023071377072819, "grad_norm": 0.11349644511938095, "learning_rate": 7.17397188777698e-05, "loss": 0.036, "step": 19450 }, { "epoch": 14.030281182408075, "grad_norm": 0.1752118170261383, "learning_rate": 7.170993414952245e-05, "loss": 0.0346, "step": 19460 }, { "epoch": 14.03749098774333, "grad_norm": 0.14217253029346466, "learning_rate": 7.168013992461233e-05, "loss": 0.0347, "step": 19470 }, { "epoch": 14.044700793078587, "grad_norm": 0.14972063899040222, "learning_rate": 7.165033621607245e-05, "loss": 0.0381, "step": 19480 }, { "epoch": 14.051910598413842, "grad_norm": 0.15793746709823608, "learning_rate": 7.162052303693995e-05, "loss": 0.0402, "step": 19490 }, { "epoch": 14.059120403749098, "grad_norm": 0.14084492623806, "learning_rate": 7.159070040025614e-05, "loss": 0.0411, "step": 19500 }, { "epoch": 14.066330209084354, "grad_norm": 0.11112861335277557, "learning_rate": 7.156086831906646e-05, "loss": 0.0333, "step": 19510 }, { "epoch": 14.07354001441961, "grad_norm": 0.16384494304656982, "learning_rate": 7.153102680642044e-05, "loss": 0.0349, "step": 19520 }, { "epoch": 14.080749819754868, "grad_norm": 0.13704852759838104, "learning_rate": 7.150117587537182e-05, "loss": 0.0294, "step": 19530 }, { "epoch": 14.087959625090123, "grad_norm": 0.14611953496932983, "learning_rate": 7.147131553897838e-05, "loss": 0.0355, "step": 19540 }, { "epoch": 14.09516943042538, "grad_norm": 0.11591081321239471, "learning_rate": 7.144144581030205e-05, "loss": 0.0303, "step": 19550 }, { "epoch": 14.102379235760635, "grad_norm": 0.15486347675323486, "learning_rate": 7.141156670240889e-05, "loss": 0.0371, "step": 19560 }, { "epoch": 14.10958904109589, "grad_norm": 0.12954406440258026, "learning_rate": 7.138167822836899e-05, "loss": 0.0341, "step": 19570 }, { "epoch": 14.116798846431147, "grad_norm": 0.17390242218971252, "learning_rate": 7.135178040125661e-05, "loss": 0.0369, "step": 19580 }, { "epoch": 14.124008651766403, "grad_norm": 0.12131394445896149, "learning_rate": 7.13218732341501e-05, "loss": 0.0351, "step": 19590 }, { "epoch": 14.131218457101658, "grad_norm": 0.15682274103164673, "learning_rate": 7.129195674013181e-05, "loss": 0.0314, "step": 19600 }, { "epoch": 14.138428262436914, "grad_norm": 0.14517845213413239, "learning_rate": 7.126203093228829e-05, "loss": 0.0329, "step": 19610 }, { "epoch": 14.14563806777217, "grad_norm": 0.23151090741157532, "learning_rate": 7.123209582371006e-05, "loss": 0.0306, "step": 19620 }, { "epoch": 14.152847873107426, "grad_norm": 0.24419352412223816, "learning_rate": 7.120215142749179e-05, "loss": 0.0368, "step": 19630 }, { "epoch": 14.160057678442682, "grad_norm": 0.16721971333026886, "learning_rate": 7.117219775673218e-05, "loss": 0.033, "step": 19640 }, { "epoch": 14.167267483777938, "grad_norm": 0.10300347954034805, "learning_rate": 7.114223482453394e-05, "loss": 0.0299, "step": 19650 }, { "epoch": 14.174477289113193, "grad_norm": 0.13837413489818573, "learning_rate": 7.11122626440039e-05, "loss": 0.0327, "step": 19660 }, { "epoch": 14.18168709444845, "grad_norm": 0.13509418070316315, "learning_rate": 7.10822812282529e-05, "loss": 0.033, "step": 19670 }, { "epoch": 14.188896899783705, "grad_norm": 0.12304884940385818, "learning_rate": 7.105229059039586e-05, "loss": 0.0372, "step": 19680 }, { "epoch": 14.196106705118961, "grad_norm": 0.07134955376386642, "learning_rate": 7.102229074355168e-05, "loss": 0.0304, "step": 19690 }, { "epoch": 14.203316510454219, "grad_norm": 0.1690855324268341, "learning_rate": 7.099228170084332e-05, "loss": 0.0336, "step": 19700 }, { "epoch": 14.210526315789474, "grad_norm": 0.1574675738811493, "learning_rate": 7.096226347539771e-05, "loss": 0.0371, "step": 19710 }, { "epoch": 14.21773612112473, "grad_norm": 0.17842614650726318, "learning_rate": 7.093223608034592e-05, "loss": 0.0342, "step": 19720 }, { "epoch": 14.224945926459986, "grad_norm": 0.13694819808006287, "learning_rate": 7.090219952882291e-05, "loss": 0.0377, "step": 19730 }, { "epoch": 14.232155731795242, "grad_norm": 0.13469372689723969, "learning_rate": 7.087215383396768e-05, "loss": 0.0337, "step": 19740 }, { "epoch": 14.239365537130498, "grad_norm": 0.1480054408311844, "learning_rate": 7.084209900892325e-05, "loss": 0.0342, "step": 19750 }, { "epoch": 14.246575342465754, "grad_norm": 0.14006957411766052, "learning_rate": 7.081203506683663e-05, "loss": 0.0363, "step": 19760 }, { "epoch": 14.25378514780101, "grad_norm": 0.1494584083557129, "learning_rate": 7.078196202085879e-05, "loss": 0.0326, "step": 19770 }, { "epoch": 14.260994953136265, "grad_norm": 0.15077053010463715, "learning_rate": 7.075187988414473e-05, "loss": 0.0345, "step": 19780 }, { "epoch": 14.268204758471521, "grad_norm": 0.14894509315490723, "learning_rate": 7.072178866985334e-05, "loss": 0.037, "step": 19790 }, { "epoch": 14.275414563806777, "grad_norm": 0.16440579295158386, "learning_rate": 7.069168839114764e-05, "loss": 0.0337, "step": 19800 }, { "epoch": 14.282624369142033, "grad_norm": 0.19264082610607147, "learning_rate": 7.066157906119441e-05, "loss": 0.0319, "step": 19810 }, { "epoch": 14.289834174477289, "grad_norm": 0.09187889844179153, "learning_rate": 7.063146069316456e-05, "loss": 0.0339, "step": 19820 }, { "epoch": 14.297043979812544, "grad_norm": 0.13963213562965393, "learning_rate": 7.060133330023287e-05, "loss": 0.0341, "step": 19830 }, { "epoch": 14.3042537851478, "grad_norm": 0.17015740275382996, "learning_rate": 7.057119689557807e-05, "loss": 0.0408, "step": 19840 }, { "epoch": 14.311463590483058, "grad_norm": 0.15977241098880768, "learning_rate": 7.054105149238287e-05, "loss": 0.0361, "step": 19850 }, { "epoch": 14.318673395818314, "grad_norm": 0.11573483794927597, "learning_rate": 7.051089710383387e-05, "loss": 0.0425, "step": 19860 }, { "epoch": 14.32588320115357, "grad_norm": 0.1344679445028305, "learning_rate": 7.048073374312166e-05, "loss": 0.0377, "step": 19870 }, { "epoch": 14.333093006488825, "grad_norm": 0.16233138740062714, "learning_rate": 7.045056142344067e-05, "loss": 0.0352, "step": 19880 }, { "epoch": 14.340302811824081, "grad_norm": 0.10108377039432526, "learning_rate": 7.042038015798934e-05, "loss": 0.0309, "step": 19890 }, { "epoch": 14.347512617159337, "grad_norm": 0.1678275316953659, "learning_rate": 7.039018995996995e-05, "loss": 0.0365, "step": 19900 }, { "epoch": 14.354722422494593, "grad_norm": 0.10622391104698181, "learning_rate": 7.035999084258876e-05, "loss": 0.0326, "step": 19910 }, { "epoch": 14.361932227829849, "grad_norm": 0.1602836400270462, "learning_rate": 7.032978281905583e-05, "loss": 0.0323, "step": 19920 }, { "epoch": 14.369142033165105, "grad_norm": 0.20028439164161682, "learning_rate": 7.029956590258522e-05, "loss": 0.034, "step": 19930 }, { "epoch": 14.37635183850036, "grad_norm": 0.10976798087358475, "learning_rate": 7.026934010639483e-05, "loss": 0.0286, "step": 19940 }, { "epoch": 14.383561643835616, "grad_norm": 0.12340164184570312, "learning_rate": 7.023910544370645e-05, "loss": 0.0303, "step": 19950 }, { "epoch": 14.390771449170872, "grad_norm": 0.11456993967294693, "learning_rate": 7.020886192774573e-05, "loss": 0.0347, "step": 19960 }, { "epoch": 14.397981254506128, "grad_norm": 0.1387726217508316, "learning_rate": 7.017860957174222e-05, "loss": 0.0312, "step": 19970 }, { "epoch": 14.405191059841384, "grad_norm": 0.126731738448143, "learning_rate": 7.014834838892934e-05, "loss": 0.0315, "step": 19980 }, { "epoch": 14.41240086517664, "grad_norm": 0.1482362300157547, "learning_rate": 7.011807839254435e-05, "loss": 0.0301, "step": 19990 }, { "epoch": 14.419610670511895, "grad_norm": 0.0927891656756401, "learning_rate": 7.008779959582837e-05, "loss": 0.035, "step": 20000 }, { "epoch": 14.426820475847151, "grad_norm": 0.13397669792175293, "learning_rate": 7.005751201202639e-05, "loss": 0.032, "step": 20010 }, { "epoch": 14.434030281182409, "grad_norm": 0.15694040060043335, "learning_rate": 7.002721565438724e-05, "loss": 0.0321, "step": 20020 }, { "epoch": 14.441240086517665, "grad_norm": 0.17111527919769287, "learning_rate": 6.999691053616353e-05, "loss": 0.033, "step": 20030 }, { "epoch": 14.44844989185292, "grad_norm": 0.08825026452541351, "learning_rate": 6.996659667061178e-05, "loss": 0.0308, "step": 20040 }, { "epoch": 14.455659697188176, "grad_norm": 0.1276458352804184, "learning_rate": 6.993627407099229e-05, "loss": 0.0324, "step": 20050 }, { "epoch": 14.462869502523432, "grad_norm": 0.1275893747806549, "learning_rate": 6.990594275056921e-05, "loss": 0.0333, "step": 20060 }, { "epoch": 14.470079307858688, "grad_norm": 0.19489489495754242, "learning_rate": 6.987560272261049e-05, "loss": 0.036, "step": 20070 }, { "epoch": 14.477289113193944, "grad_norm": 0.17180617153644562, "learning_rate": 6.984525400038788e-05, "loss": 0.031, "step": 20080 }, { "epoch": 14.4844989185292, "grad_norm": 0.14048562943935394, "learning_rate": 6.981489659717693e-05, "loss": 0.0318, "step": 20090 }, { "epoch": 14.491708723864456, "grad_norm": 0.15637251734733582, "learning_rate": 6.978453052625703e-05, "loss": 0.0294, "step": 20100 }, { "epoch": 14.498918529199711, "grad_norm": 0.1087094098329544, "learning_rate": 6.975415580091131e-05, "loss": 0.0304, "step": 20110 }, { "epoch": 14.506128334534967, "grad_norm": 0.13347792625427246, "learning_rate": 6.972377243442672e-05, "loss": 0.0307, "step": 20120 }, { "epoch": 14.513338139870223, "grad_norm": 0.11055100709199905, "learning_rate": 6.969338044009396e-05, "loss": 0.0302, "step": 20130 }, { "epoch": 14.520547945205479, "grad_norm": 0.15675030648708344, "learning_rate": 6.966297983120753e-05, "loss": 0.033, "step": 20140 }, { "epoch": 14.527757750540735, "grad_norm": 0.13797345757484436, "learning_rate": 6.963257062106571e-05, "loss": 0.032, "step": 20150 }, { "epoch": 14.53496755587599, "grad_norm": 0.13540305197238922, "learning_rate": 6.960215282297048e-05, "loss": 0.0346, "step": 20160 }, { "epoch": 14.542177361211248, "grad_norm": 0.1273849606513977, "learning_rate": 6.957172645022765e-05, "loss": 0.0358, "step": 20170 }, { "epoch": 14.549387166546504, "grad_norm": 0.13454489409923553, "learning_rate": 6.954129151614673e-05, "loss": 0.0295, "step": 20180 }, { "epoch": 14.55659697188176, "grad_norm": 0.12321712076663971, "learning_rate": 6.9510848034041e-05, "loss": 0.0361, "step": 20190 }, { "epoch": 14.563806777217016, "grad_norm": 0.15523585677146912, "learning_rate": 6.94803960172275e-05, "loss": 0.0328, "step": 20200 }, { "epoch": 14.571016582552272, "grad_norm": 0.1195123940706253, "learning_rate": 6.944993547902692e-05, "loss": 0.0399, "step": 20210 }, { "epoch": 14.578226387887527, "grad_norm": 0.12163154035806656, "learning_rate": 6.941946643276379e-05, "loss": 0.0338, "step": 20220 }, { "epoch": 14.585436193222783, "grad_norm": 0.14448674023151398, "learning_rate": 6.938898889176626e-05, "loss": 0.0332, "step": 20230 }, { "epoch": 14.592645998558039, "grad_norm": 0.14934495091438293, "learning_rate": 6.935850286936627e-05, "loss": 0.0289, "step": 20240 }, { "epoch": 14.599855803893295, "grad_norm": 0.14194104075431824, "learning_rate": 6.932800837889943e-05, "loss": 0.0348, "step": 20250 }, { "epoch": 14.60706560922855, "grad_norm": 0.15861926972866058, "learning_rate": 6.92975054337051e-05, "loss": 0.0306, "step": 20260 }, { "epoch": 14.614275414563807, "grad_norm": 0.12171942740678787, "learning_rate": 6.926699404712627e-05, "loss": 0.032, "step": 20270 }, { "epoch": 14.621485219899062, "grad_norm": 0.1666516810655594, "learning_rate": 6.923647423250967e-05, "loss": 0.0395, "step": 20280 }, { "epoch": 14.628695025234318, "grad_norm": 0.16106434166431427, "learning_rate": 6.920594600320567e-05, "loss": 0.0328, "step": 20290 }, { "epoch": 14.635904830569574, "grad_norm": 0.13808707892894745, "learning_rate": 6.91754093725684e-05, "loss": 0.0311, "step": 20300 }, { "epoch": 14.64311463590483, "grad_norm": 0.17556551098823547, "learning_rate": 6.914486435395561e-05, "loss": 0.0371, "step": 20310 }, { "epoch": 14.650324441240086, "grad_norm": 0.1344643235206604, "learning_rate": 6.911431096072871e-05, "loss": 0.0311, "step": 20320 }, { "epoch": 14.657534246575342, "grad_norm": 0.15598838031291962, "learning_rate": 6.90837492062528e-05, "loss": 0.032, "step": 20330 }, { "epoch": 14.6647440519106, "grad_norm": 0.1439514011144638, "learning_rate": 6.905317910389664e-05, "loss": 0.0376, "step": 20340 }, { "epoch": 14.671953857245855, "grad_norm": 0.13344943523406982, "learning_rate": 6.902260066703262e-05, "loss": 0.0382, "step": 20350 }, { "epoch": 14.67916366258111, "grad_norm": 0.14250902831554413, "learning_rate": 6.89920139090368e-05, "loss": 0.0358, "step": 20360 }, { "epoch": 14.686373467916367, "grad_norm": 0.14050620794296265, "learning_rate": 6.896141884328886e-05, "loss": 0.0317, "step": 20370 }, { "epoch": 14.693583273251623, "grad_norm": 0.1311141550540924, "learning_rate": 6.893081548317211e-05, "loss": 0.0294, "step": 20380 }, { "epoch": 14.700793078586878, "grad_norm": 0.15115854144096375, "learning_rate": 6.890020384207353e-05, "loss": 0.0278, "step": 20390 }, { "epoch": 14.708002883922134, "grad_norm": 0.14126010239124298, "learning_rate": 6.886958393338366e-05, "loss": 0.0332, "step": 20400 }, { "epoch": 14.71521268925739, "grad_norm": 0.14771923422813416, "learning_rate": 6.883895577049668e-05, "loss": 0.0282, "step": 20410 }, { "epoch": 14.722422494592646, "grad_norm": 0.11810488998889923, "learning_rate": 6.880831936681045e-05, "loss": 0.0328, "step": 20420 }, { "epoch": 14.729632299927902, "grad_norm": 0.11397877335548401, "learning_rate": 6.87776747357263e-05, "loss": 0.0288, "step": 20430 }, { "epoch": 14.736842105263158, "grad_norm": 0.16271787881851196, "learning_rate": 6.874702189064927e-05, "loss": 0.0322, "step": 20440 }, { "epoch": 14.744051910598413, "grad_norm": 0.18723545968532562, "learning_rate": 6.871636084498796e-05, "loss": 0.0385, "step": 20450 }, { "epoch": 14.75126171593367, "grad_norm": 0.17923057079315186, "learning_rate": 6.868569161215453e-05, "loss": 0.0331, "step": 20460 }, { "epoch": 14.758471521268925, "grad_norm": 0.12650899589061737, "learning_rate": 6.865501420556477e-05, "loss": 0.0304, "step": 20470 }, { "epoch": 14.76568132660418, "grad_norm": 0.1421847641468048, "learning_rate": 6.862432863863802e-05, "loss": 0.0333, "step": 20480 }, { "epoch": 14.772891131939438, "grad_norm": 0.15828266739845276, "learning_rate": 6.859363492479716e-05, "loss": 0.0315, "step": 20490 }, { "epoch": 14.780100937274694, "grad_norm": 0.13974890112876892, "learning_rate": 6.856293307746868e-05, "loss": 0.0303, "step": 20500 }, { "epoch": 14.78731074260995, "grad_norm": 0.13490824401378632, "learning_rate": 6.853222311008263e-05, "loss": 0.0301, "step": 20510 }, { "epoch": 14.794520547945206, "grad_norm": 0.14387555420398712, "learning_rate": 6.850150503607256e-05, "loss": 0.0341, "step": 20520 }, { "epoch": 14.801730353280462, "grad_norm": 0.12356528639793396, "learning_rate": 6.847077886887562e-05, "loss": 0.0318, "step": 20530 }, { "epoch": 14.808940158615718, "grad_norm": 0.13040883839130402, "learning_rate": 6.844004462193248e-05, "loss": 0.0318, "step": 20540 }, { "epoch": 14.816149963950974, "grad_norm": 0.18795473873615265, "learning_rate": 6.840930230868734e-05, "loss": 0.036, "step": 20550 }, { "epoch": 14.82335976928623, "grad_norm": 0.1173051968216896, "learning_rate": 6.837855194258791e-05, "loss": 0.0304, "step": 20560 }, { "epoch": 14.830569574621485, "grad_norm": 0.15375857055187225, "learning_rate": 6.834779353708548e-05, "loss": 0.039, "step": 20570 }, { "epoch": 14.837779379956741, "grad_norm": 0.17544402182102203, "learning_rate": 6.831702710563478e-05, "loss": 0.0343, "step": 20580 }, { "epoch": 14.844989185291997, "grad_norm": 0.14650949835777283, "learning_rate": 6.828625266169416e-05, "loss": 0.0278, "step": 20590 }, { "epoch": 14.852198990627253, "grad_norm": 0.10535135865211487, "learning_rate": 6.825547021872534e-05, "loss": 0.0277, "step": 20600 }, { "epoch": 14.859408795962509, "grad_norm": 0.17070575058460236, "learning_rate": 6.822467979019365e-05, "loss": 0.037, "step": 20610 }, { "epoch": 14.866618601297764, "grad_norm": 0.11354958266019821, "learning_rate": 6.819388138956783e-05, "loss": 0.0367, "step": 20620 }, { "epoch": 14.87382840663302, "grad_norm": 0.11394532769918442, "learning_rate": 6.816307503032019e-05, "loss": 0.0316, "step": 20630 }, { "epoch": 14.881038211968276, "grad_norm": 0.09054534882307053, "learning_rate": 6.813226072592645e-05, "loss": 0.0293, "step": 20640 }, { "epoch": 14.888248017303532, "grad_norm": 0.11541539430618286, "learning_rate": 6.810143848986583e-05, "loss": 0.0292, "step": 20650 }, { "epoch": 14.89545782263879, "grad_norm": 0.15632149577140808, "learning_rate": 6.807060833562106e-05, "loss": 0.038, "step": 20660 }, { "epoch": 14.902667627974045, "grad_norm": 0.149275541305542, "learning_rate": 6.803977027667825e-05, "loss": 0.0328, "step": 20670 }, { "epoch": 14.909877433309301, "grad_norm": 0.14711026847362518, "learning_rate": 6.800892432652704e-05, "loss": 0.0325, "step": 20680 }, { "epoch": 14.917087238644557, "grad_norm": 0.13415110111236572, "learning_rate": 6.797807049866049e-05, "loss": 0.0306, "step": 20690 }, { "epoch": 14.924297043979813, "grad_norm": 0.12508752942085266, "learning_rate": 6.794720880657512e-05, "loss": 0.0325, "step": 20700 }, { "epoch": 14.931506849315069, "grad_norm": 0.14287616312503815, "learning_rate": 6.791633926377085e-05, "loss": 0.0296, "step": 20710 }, { "epoch": 14.938716654650325, "grad_norm": 0.15702715516090393, "learning_rate": 6.788546188375114e-05, "loss": 0.0337, "step": 20720 }, { "epoch": 14.94592645998558, "grad_norm": 0.1350395679473877, "learning_rate": 6.785457668002274e-05, "loss": 0.0322, "step": 20730 }, { "epoch": 14.953136265320836, "grad_norm": 0.143866166472435, "learning_rate": 6.78236836660959e-05, "loss": 0.0336, "step": 20740 }, { "epoch": 14.960346070656092, "grad_norm": 0.1287306398153305, "learning_rate": 6.779278285548427e-05, "loss": 0.0304, "step": 20750 }, { "epoch": 14.967555875991348, "grad_norm": 0.1340542882680893, "learning_rate": 6.776187426170493e-05, "loss": 0.0274, "step": 20760 }, { "epoch": 14.974765681326604, "grad_norm": 0.12601904571056366, "learning_rate": 6.773095789827837e-05, "loss": 0.0317, "step": 20770 }, { "epoch": 14.98197548666186, "grad_norm": 0.12585173547267914, "learning_rate": 6.770003377872841e-05, "loss": 0.0331, "step": 20780 }, { "epoch": 14.989185291997115, "grad_norm": 0.16125664114952087, "learning_rate": 6.766910191658233e-05, "loss": 0.0402, "step": 20790 }, { "epoch": 14.996395097332371, "grad_norm": 0.19183778762817383, "learning_rate": 6.763816232537079e-05, "loss": 0.0393, "step": 20800 }, { "epoch": 15.003604902667629, "grad_norm": 0.07582250237464905, "learning_rate": 6.76072150186278e-05, "loss": 0.0324, "step": 20810 }, { "epoch": 15.010814708002885, "grad_norm": 0.13209134340286255, "learning_rate": 6.757626000989077e-05, "loss": 0.0333, "step": 20820 }, { "epoch": 15.01802451333814, "grad_norm": 0.14854750037193298, "learning_rate": 6.754529731270048e-05, "loss": 0.0287, "step": 20830 }, { "epoch": 15.025234318673396, "grad_norm": 0.15123775601387024, "learning_rate": 6.751432694060105e-05, "loss": 0.0305, "step": 20840 }, { "epoch": 15.032444124008652, "grad_norm": 0.1496703326702118, "learning_rate": 6.748334890713999e-05, "loss": 0.0316, "step": 20850 }, { "epoch": 15.039653929343908, "grad_norm": 0.12781210243701935, "learning_rate": 6.745236322586813e-05, "loss": 0.0304, "step": 20860 }, { "epoch": 15.046863734679164, "grad_norm": 0.14251834154129028, "learning_rate": 6.742136991033967e-05, "loss": 0.0299, "step": 20870 }, { "epoch": 15.05407354001442, "grad_norm": 0.11010768264532089, "learning_rate": 6.739036897411215e-05, "loss": 0.0297, "step": 20880 }, { "epoch": 15.061283345349675, "grad_norm": 0.1392602175474167, "learning_rate": 6.73593604307464e-05, "loss": 0.0321, "step": 20890 }, { "epoch": 15.068493150684931, "grad_norm": 0.12064661085605621, "learning_rate": 6.732834429380663e-05, "loss": 0.0265, "step": 20900 }, { "epoch": 15.075702956020187, "grad_norm": 0.14913038909435272, "learning_rate": 6.729732057686036e-05, "loss": 0.0359, "step": 20910 }, { "epoch": 15.082912761355443, "grad_norm": 0.12555396556854248, "learning_rate": 6.726628929347839e-05, "loss": 0.0325, "step": 20920 }, { "epoch": 15.090122566690699, "grad_norm": 0.11749030649662018, "learning_rate": 6.723525045723488e-05, "loss": 0.0367, "step": 20930 }, { "epoch": 15.097332372025955, "grad_norm": 0.13397793471813202, "learning_rate": 6.720420408170728e-05, "loss": 0.0358, "step": 20940 }, { "epoch": 15.10454217736121, "grad_norm": 0.14019069075584412, "learning_rate": 6.71731501804763e-05, "loss": 0.0337, "step": 20950 }, { "epoch": 15.111751982696466, "grad_norm": 0.11863808333873749, "learning_rate": 6.714208876712602e-05, "loss": 0.0302, "step": 20960 }, { "epoch": 15.118961788031724, "grad_norm": 0.1375526785850525, "learning_rate": 6.71110198552437e-05, "loss": 0.0326, "step": 20970 }, { "epoch": 15.12617159336698, "grad_norm": 0.13773584365844727, "learning_rate": 6.707994345841996e-05, "loss": 0.0408, "step": 20980 }, { "epoch": 15.133381398702236, "grad_norm": 0.11609120666980743, "learning_rate": 6.70488595902487e-05, "loss": 0.0325, "step": 20990 }, { "epoch": 15.140591204037491, "grad_norm": 0.15199655294418335, "learning_rate": 6.7017768264327e-05, "loss": 0.0424, "step": 21000 }, { "epoch": 15.147801009372747, "grad_norm": 0.08541709929704666, "learning_rate": 6.698666949425532e-05, "loss": 0.0386, "step": 21010 }, { "epoch": 15.155010814708003, "grad_norm": 0.15585125982761383, "learning_rate": 6.69555632936373e-05, "loss": 0.0303, "step": 21020 }, { "epoch": 15.162220620043259, "grad_norm": 0.14907124638557434, "learning_rate": 6.692444967607983e-05, "loss": 0.0326, "step": 21030 }, { "epoch": 15.169430425378515, "grad_norm": 0.08434568345546722, "learning_rate": 6.689332865519309e-05, "loss": 0.0335, "step": 21040 }, { "epoch": 15.17664023071377, "grad_norm": 0.11807423084974289, "learning_rate": 6.686220024459045e-05, "loss": 0.0291, "step": 21050 }, { "epoch": 15.183850036049026, "grad_norm": 0.18003521859645844, "learning_rate": 6.683106445788856e-05, "loss": 0.0344, "step": 21060 }, { "epoch": 15.191059841384282, "grad_norm": 0.19711218774318695, "learning_rate": 6.679992130870723e-05, "loss": 0.0371, "step": 21070 }, { "epoch": 15.198269646719538, "grad_norm": 0.14410363137722015, "learning_rate": 6.676877081066957e-05, "loss": 0.0288, "step": 21080 }, { "epoch": 15.205479452054794, "grad_norm": 0.1179232969880104, "learning_rate": 6.673761297740186e-05, "loss": 0.0364, "step": 21090 }, { "epoch": 15.21268925739005, "grad_norm": 0.1272231936454773, "learning_rate": 6.670644782253358e-05, "loss": 0.0317, "step": 21100 }, { "epoch": 15.219899062725306, "grad_norm": 0.1544879674911499, "learning_rate": 6.667527535969744e-05, "loss": 0.037, "step": 21110 }, { "epoch": 15.227108868060562, "grad_norm": 0.10359148681163788, "learning_rate": 6.664409560252933e-05, "loss": 0.0283, "step": 21120 }, { "epoch": 15.23431867339582, "grad_norm": 0.13601113855838776, "learning_rate": 6.661290856466833e-05, "loss": 0.0352, "step": 21130 }, { "epoch": 15.241528478731075, "grad_norm": 0.11702542752027512, "learning_rate": 6.658171425975672e-05, "loss": 0.0287, "step": 21140 }, { "epoch": 15.24873828406633, "grad_norm": 0.1268012970685959, "learning_rate": 6.655051270143994e-05, "loss": 0.0322, "step": 21150 }, { "epoch": 15.255948089401587, "grad_norm": 0.11892174929380417, "learning_rate": 6.651930390336662e-05, "loss": 0.0334, "step": 21160 }, { "epoch": 15.263157894736842, "grad_norm": 0.1316690593957901, "learning_rate": 6.648808787918854e-05, "loss": 0.0295, "step": 21170 }, { "epoch": 15.270367700072098, "grad_norm": 0.1427183598279953, "learning_rate": 6.645686464256065e-05, "loss": 0.033, "step": 21180 }, { "epoch": 15.277577505407354, "grad_norm": 0.11107519268989563, "learning_rate": 6.642563420714106e-05, "loss": 0.0328, "step": 21190 }, { "epoch": 15.28478731074261, "grad_norm": 0.12546434998512268, "learning_rate": 6.639439658659102e-05, "loss": 0.032, "step": 21200 }, { "epoch": 15.291997116077866, "grad_norm": 0.1878345012664795, "learning_rate": 6.636315179457492e-05, "loss": 0.0309, "step": 21210 }, { "epoch": 15.299206921413122, "grad_norm": 0.15197882056236267, "learning_rate": 6.633189984476031e-05, "loss": 0.0283, "step": 21220 }, { "epoch": 15.306416726748377, "grad_norm": 0.16281045973300934, "learning_rate": 6.630064075081785e-05, "loss": 0.0323, "step": 21230 }, { "epoch": 15.313626532083633, "grad_norm": 0.1642773598432541, "learning_rate": 6.626937452642132e-05, "loss": 0.0353, "step": 21240 }, { "epoch": 15.32083633741889, "grad_norm": 0.1586584597826004, "learning_rate": 6.623810118524765e-05, "loss": 0.0306, "step": 21250 }, { "epoch": 15.328046142754145, "grad_norm": 0.1309327632188797, "learning_rate": 6.620682074097682e-05, "loss": 0.0315, "step": 21260 }, { "epoch": 15.3352559480894, "grad_norm": 0.11925529688596725, "learning_rate": 6.6175533207292e-05, "loss": 0.0396, "step": 21270 }, { "epoch": 15.342465753424657, "grad_norm": 0.2819162905216217, "learning_rate": 6.61442385978794e-05, "loss": 0.0351, "step": 21280 }, { "epoch": 15.349675558759914, "grad_norm": 0.2155168056488037, "learning_rate": 6.611293692642836e-05, "loss": 0.0353, "step": 21290 }, { "epoch": 15.35688536409517, "grad_norm": 0.12692664563655853, "learning_rate": 6.608162820663128e-05, "loss": 0.033, "step": 21300 }, { "epoch": 15.364095169430426, "grad_norm": 0.16611126065254211, "learning_rate": 6.605031245218365e-05, "loss": 0.0362, "step": 21310 }, { "epoch": 15.371304974765682, "grad_norm": 0.17662081122398376, "learning_rate": 6.601898967678408e-05, "loss": 0.0286, "step": 21320 }, { "epoch": 15.378514780100938, "grad_norm": 0.1672070026397705, "learning_rate": 6.598765989413419e-05, "loss": 0.0315, "step": 21330 }, { "epoch": 15.385724585436193, "grad_norm": 0.19859318435192108, "learning_rate": 6.595632311793867e-05, "loss": 0.0339, "step": 21340 }, { "epoch": 15.39293439077145, "grad_norm": 0.13139835000038147, "learning_rate": 6.592497936190534e-05, "loss": 0.0284, "step": 21350 }, { "epoch": 15.400144196106705, "grad_norm": 0.14144665002822876, "learning_rate": 6.5893628639745e-05, "loss": 0.0339, "step": 21360 }, { "epoch": 15.407354001441961, "grad_norm": 0.11958932876586914, "learning_rate": 6.586227096517151e-05, "loss": 0.0377, "step": 21370 }, { "epoch": 15.414563806777217, "grad_norm": 0.10755115747451782, "learning_rate": 6.58309063519018e-05, "loss": 0.0281, "step": 21380 }, { "epoch": 15.421773612112473, "grad_norm": 0.13765709102153778, "learning_rate": 6.579953481365582e-05, "loss": 0.032, "step": 21390 }, { "epoch": 15.428983417447728, "grad_norm": 0.17536327242851257, "learning_rate": 6.576815636415652e-05, "loss": 0.0311, "step": 21400 }, { "epoch": 15.436193222782984, "grad_norm": 0.1144598200917244, "learning_rate": 6.573677101712991e-05, "loss": 0.0327, "step": 21410 }, { "epoch": 15.44340302811824, "grad_norm": 0.14475207030773163, "learning_rate": 6.570537878630502e-05, "loss": 0.0309, "step": 21420 }, { "epoch": 15.450612833453496, "grad_norm": 0.11612673848867416, "learning_rate": 6.56739796854139e-05, "loss": 0.0309, "step": 21430 }, { "epoch": 15.457822638788752, "grad_norm": 0.13178101181983948, "learning_rate": 6.564257372819153e-05, "loss": 0.0363, "step": 21440 }, { "epoch": 15.46503244412401, "grad_norm": 0.1247469112277031, "learning_rate": 6.561116092837596e-05, "loss": 0.033, "step": 21450 }, { "epoch": 15.472242249459265, "grad_norm": 0.10152879357337952, "learning_rate": 6.557974129970825e-05, "loss": 0.0348, "step": 21460 }, { "epoch": 15.479452054794521, "grad_norm": 0.13401487469673157, "learning_rate": 6.554831485593239e-05, "loss": 0.0293, "step": 21470 }, { "epoch": 15.486661860129777, "grad_norm": 0.23245534300804138, "learning_rate": 6.551688161079534e-05, "loss": 0.0341, "step": 21480 }, { "epoch": 15.493871665465033, "grad_norm": 0.12830474972724915, "learning_rate": 6.548544157804712e-05, "loss": 0.0329, "step": 21490 }, { "epoch": 15.501081470800289, "grad_norm": 0.14556798338890076, "learning_rate": 6.545399477144065e-05, "loss": 0.0324, "step": 21500 }, { "epoch": 15.508291276135544, "grad_norm": 0.14311207830905914, "learning_rate": 6.542254120473184e-05, "loss": 0.0337, "step": 21510 }, { "epoch": 15.5155010814708, "grad_norm": 0.14261497557163239, "learning_rate": 6.539108089167953e-05, "loss": 0.0289, "step": 21520 }, { "epoch": 15.522710886806056, "grad_norm": 0.1729521006345749, "learning_rate": 6.535961384604554e-05, "loss": 0.0345, "step": 21530 }, { "epoch": 15.529920692141312, "grad_norm": 0.08526650071144104, "learning_rate": 6.532814008159461e-05, "loss": 0.0286, "step": 21540 }, { "epoch": 15.537130497476568, "grad_norm": 0.1068815067410469, "learning_rate": 6.529665961209446e-05, "loss": 0.0343, "step": 21550 }, { "epoch": 15.544340302811824, "grad_norm": 0.15688951313495636, "learning_rate": 6.52651724513157e-05, "loss": 0.0272, "step": 21560 }, { "epoch": 15.55155010814708, "grad_norm": 0.13300484418869019, "learning_rate": 6.523367861303189e-05, "loss": 0.0302, "step": 21570 }, { "epoch": 15.558759913482335, "grad_norm": 0.15478435158729553, "learning_rate": 6.520217811101951e-05, "loss": 0.0297, "step": 21580 }, { "epoch": 15.565969718817591, "grad_norm": 0.16268479824066162, "learning_rate": 6.517067095905793e-05, "loss": 0.0328, "step": 21590 }, { "epoch": 15.573179524152849, "grad_norm": 0.1643877476453781, "learning_rate": 6.513915717092948e-05, "loss": 0.0371, "step": 21600 }, { "epoch": 15.580389329488105, "grad_norm": 0.15494774281978607, "learning_rate": 6.510763676041935e-05, "loss": 0.0291, "step": 21610 }, { "epoch": 15.58759913482336, "grad_norm": 0.15013834834098816, "learning_rate": 6.50761097413156e-05, "loss": 0.0346, "step": 21620 }, { "epoch": 15.594808940158616, "grad_norm": 0.1187107115983963, "learning_rate": 6.504457612740928e-05, "loss": 0.0352, "step": 21630 }, { "epoch": 15.602018745493872, "grad_norm": 0.12537313997745514, "learning_rate": 6.501303593249423e-05, "loss": 0.0347, "step": 21640 }, { "epoch": 15.609228550829128, "grad_norm": 0.14808225631713867, "learning_rate": 6.49814891703672e-05, "loss": 0.0295, "step": 21650 }, { "epoch": 15.616438356164384, "grad_norm": 0.11576258391141891, "learning_rate": 6.494993585482783e-05, "loss": 0.0302, "step": 21660 }, { "epoch": 15.62364816149964, "grad_norm": 0.1902793049812317, "learning_rate": 6.49183759996786e-05, "loss": 0.0273, "step": 21670 }, { "epoch": 15.630857966834895, "grad_norm": 0.19210590422153473, "learning_rate": 6.488680961872486e-05, "loss": 0.0296, "step": 21680 }, { "epoch": 15.638067772170151, "grad_norm": 0.16173380613327026, "learning_rate": 6.485523672577485e-05, "loss": 0.029, "step": 21690 }, { "epoch": 15.645277577505407, "grad_norm": 0.1096276044845581, "learning_rate": 6.482365733463959e-05, "loss": 0.0366, "step": 21700 }, { "epoch": 15.652487382840663, "grad_norm": 0.12183451652526855, "learning_rate": 6.4792071459133e-05, "loss": 0.0358, "step": 21710 }, { "epoch": 15.659697188175919, "grad_norm": 0.11524564772844315, "learning_rate": 6.476047911307179e-05, "loss": 0.0276, "step": 21720 }, { "epoch": 15.666906993511175, "grad_norm": 0.1872175633907318, "learning_rate": 6.472888031027556e-05, "loss": 0.0342, "step": 21730 }, { "epoch": 15.67411679884643, "grad_norm": 0.16151070594787598, "learning_rate": 6.469727506456666e-05, "loss": 0.0345, "step": 21740 }, { "epoch": 15.681326604181686, "grad_norm": 0.1039075255393982, "learning_rate": 6.466566338977034e-05, "loss": 0.0306, "step": 21750 }, { "epoch": 15.688536409516942, "grad_norm": 0.136116161942482, "learning_rate": 6.46340452997146e-05, "loss": 0.0289, "step": 21760 }, { "epoch": 15.6957462148522, "grad_norm": 0.124783955514431, "learning_rate": 6.460242080823025e-05, "loss": 0.0344, "step": 21770 }, { "epoch": 15.702956020187456, "grad_norm": 0.10253258794546127, "learning_rate": 6.457078992915093e-05, "loss": 0.0301, "step": 21780 }, { "epoch": 15.710165825522711, "grad_norm": 0.1556670218706131, "learning_rate": 6.453915267631308e-05, "loss": 0.0324, "step": 21790 }, { "epoch": 15.717375630857967, "grad_norm": 0.09745895117521286, "learning_rate": 6.450750906355589e-05, "loss": 0.0334, "step": 21800 }, { "epoch": 15.724585436193223, "grad_norm": 0.12886327505111694, "learning_rate": 6.447585910472134e-05, "loss": 0.033, "step": 21810 }, { "epoch": 15.731795241528479, "grad_norm": 0.13585107028484344, "learning_rate": 6.444420281365424e-05, "loss": 0.0385, "step": 21820 }, { "epoch": 15.739005046863735, "grad_norm": 0.12242895364761353, "learning_rate": 6.441254020420208e-05, "loss": 0.0296, "step": 21830 }, { "epoch": 15.74621485219899, "grad_norm": 0.1507023125886917, "learning_rate": 6.438087129021522e-05, "loss": 0.0376, "step": 21840 }, { "epoch": 15.753424657534246, "grad_norm": 0.16737402975559235, "learning_rate": 6.434919608554663e-05, "loss": 0.0289, "step": 21850 }, { "epoch": 15.760634462869502, "grad_norm": 0.11399668455123901, "learning_rate": 6.43175146040522e-05, "loss": 0.0356, "step": 21860 }, { "epoch": 15.767844268204758, "grad_norm": 0.14231480658054352, "learning_rate": 6.428582685959045e-05, "loss": 0.0313, "step": 21870 }, { "epoch": 15.775054073540014, "grad_norm": 0.14579223096370697, "learning_rate": 6.42541328660227e-05, "loss": 0.0328, "step": 21880 }, { "epoch": 15.78226387887527, "grad_norm": 0.10244758427143097, "learning_rate": 6.422243263721293e-05, "loss": 0.0317, "step": 21890 }, { "epoch": 15.789473684210526, "grad_norm": 0.09088342636823654, "learning_rate": 6.419072618702794e-05, "loss": 0.032, "step": 21900 }, { "epoch": 15.796683489545781, "grad_norm": 0.14940698444843292, "learning_rate": 6.41590135293372e-05, "loss": 0.0326, "step": 21910 }, { "epoch": 15.803893294881039, "grad_norm": 0.15001599490642548, "learning_rate": 6.41272946780129e-05, "loss": 0.0368, "step": 21920 }, { "epoch": 15.811103100216295, "grad_norm": 0.1252783089876175, "learning_rate": 6.409556964692996e-05, "loss": 0.0284, "step": 21930 }, { "epoch": 15.81831290555155, "grad_norm": 0.15277692675590515, "learning_rate": 6.406383844996595e-05, "loss": 0.0375, "step": 21940 }, { "epoch": 15.825522710886807, "grad_norm": 0.22538016736507416, "learning_rate": 6.403210110100121e-05, "loss": 0.0386, "step": 21950 }, { "epoch": 15.832732516222062, "grad_norm": 0.1331184208393097, "learning_rate": 6.400035761391871e-05, "loss": 0.0301, "step": 21960 }, { "epoch": 15.839942321557318, "grad_norm": 0.13791373372077942, "learning_rate": 6.396860800260416e-05, "loss": 0.0314, "step": 21970 }, { "epoch": 15.847152126892574, "grad_norm": 0.16757193207740784, "learning_rate": 6.393685228094588e-05, "loss": 0.0296, "step": 21980 }, { "epoch": 15.85436193222783, "grad_norm": 0.13653545081615448, "learning_rate": 6.390509046283493e-05, "loss": 0.0439, "step": 21990 }, { "epoch": 15.861571737563086, "grad_norm": 0.10087460279464722, "learning_rate": 6.387332256216498e-05, "loss": 0.0287, "step": 22000 }, { "epoch": 15.868781542898342, "grad_norm": 0.11387955397367477, "learning_rate": 6.384154859283243e-05, "loss": 0.031, "step": 22010 }, { "epoch": 15.875991348233597, "grad_norm": 0.12766146659851074, "learning_rate": 6.380976856873627e-05, "loss": 0.0325, "step": 22020 }, { "epoch": 15.883201153568853, "grad_norm": 0.23169414699077606, "learning_rate": 6.377798250377815e-05, "loss": 0.0349, "step": 22030 }, { "epoch": 15.89041095890411, "grad_norm": 0.13793760538101196, "learning_rate": 6.374619041186238e-05, "loss": 0.0336, "step": 22040 }, { "epoch": 15.897620764239365, "grad_norm": 0.12893925607204437, "learning_rate": 6.371439230689592e-05, "loss": 0.0278, "step": 22050 }, { "epoch": 15.90483056957462, "grad_norm": 0.1303417980670929, "learning_rate": 6.368258820278833e-05, "loss": 0.0318, "step": 22060 }, { "epoch": 15.912040374909877, "grad_norm": 0.11236979812383652, "learning_rate": 6.365077811345182e-05, "loss": 0.0296, "step": 22070 }, { "epoch": 15.919250180245132, "grad_norm": 0.15548551082611084, "learning_rate": 6.361896205280117e-05, "loss": 0.0295, "step": 22080 }, { "epoch": 15.92645998558039, "grad_norm": 0.16719697415828705, "learning_rate": 6.358714003475384e-05, "loss": 0.0303, "step": 22090 }, { "epoch": 15.933669790915646, "grad_norm": 0.12622271478176117, "learning_rate": 6.355531207322983e-05, "loss": 0.0355, "step": 22100 }, { "epoch": 15.940879596250902, "grad_norm": 0.11095235496759415, "learning_rate": 6.35234781821518e-05, "loss": 0.0305, "step": 22110 }, { "epoch": 15.948089401586158, "grad_norm": 0.14988069236278534, "learning_rate": 6.349163837544497e-05, "loss": 0.037, "step": 22120 }, { "epoch": 15.955299206921413, "grad_norm": 0.12174361199140549, "learning_rate": 6.345979266703714e-05, "loss": 0.0305, "step": 22130 }, { "epoch": 15.96250901225667, "grad_norm": 0.14512518048286438, "learning_rate": 6.342794107085872e-05, "loss": 0.0291, "step": 22140 }, { "epoch": 15.969718817591925, "grad_norm": 0.1370907425880432, "learning_rate": 6.339608360084265e-05, "loss": 0.0358, "step": 22150 }, { "epoch": 15.976928622927181, "grad_norm": 0.1554478257894516, "learning_rate": 6.336422027092452e-05, "loss": 0.0319, "step": 22160 }, { "epoch": 15.984138428262437, "grad_norm": 0.11135166883468628, "learning_rate": 6.333235109504241e-05, "loss": 0.0318, "step": 22170 }, { "epoch": 15.991348233597693, "grad_norm": 0.15392456948757172, "learning_rate": 6.330047608713695e-05, "loss": 0.0288, "step": 22180 }, { "epoch": 15.998558038932948, "grad_norm": 0.1590588539838791, "learning_rate": 6.32685952611514e-05, "loss": 0.0324, "step": 22190 }, { "epoch": 16.005767844268206, "grad_norm": 0.11030558496713638, "learning_rate": 6.323670863103149e-05, "loss": 0.0271, "step": 22200 }, { "epoch": 16.01297764960346, "grad_norm": 0.0967399924993515, "learning_rate": 6.32048162107255e-05, "loss": 0.0318, "step": 22210 }, { "epoch": 16.020187454938718, "grad_norm": 0.09354943037033081, "learning_rate": 6.317291801418431e-05, "loss": 0.026, "step": 22220 }, { "epoch": 16.027397260273972, "grad_norm": 0.19917483627796173, "learning_rate": 6.31410140553612e-05, "loss": 0.0361, "step": 22230 }, { "epoch": 16.03460706560923, "grad_norm": 0.13418281078338623, "learning_rate": 6.310910434821211e-05, "loss": 0.0332, "step": 22240 }, { "epoch": 16.041816870944483, "grad_norm": 0.13579756021499634, "learning_rate": 6.307718890669541e-05, "loss": 0.0338, "step": 22250 }, { "epoch": 16.04902667627974, "grad_norm": 0.1802493929862976, "learning_rate": 6.304526774477198e-05, "loss": 0.0358, "step": 22260 }, { "epoch": 16.056236481614995, "grad_norm": 0.1365540474653244, "learning_rate": 6.301334087640526e-05, "loss": 0.038, "step": 22270 }, { "epoch": 16.063446286950253, "grad_norm": 0.14464621245861053, "learning_rate": 6.298140831556112e-05, "loss": 0.0291, "step": 22280 }, { "epoch": 16.070656092285507, "grad_norm": 0.1435399353504181, "learning_rate": 6.294947007620794e-05, "loss": 0.0358, "step": 22290 }, { "epoch": 16.077865897620764, "grad_norm": 0.1688268780708313, "learning_rate": 6.291752617231661e-05, "loss": 0.0303, "step": 22300 }, { "epoch": 16.08507570295602, "grad_norm": 0.16458424925804138, "learning_rate": 6.288557661786047e-05, "loss": 0.0295, "step": 22310 }, { "epoch": 16.092285508291276, "grad_norm": 0.1448703110218048, "learning_rate": 6.285362142681534e-05, "loss": 0.0292, "step": 22320 }, { "epoch": 16.099495313626534, "grad_norm": 0.20889131724834442, "learning_rate": 6.28216606131595e-05, "loss": 0.0352, "step": 22330 }, { "epoch": 16.106705118961788, "grad_norm": 0.14681951701641083, "learning_rate": 6.27896941908737e-05, "loss": 0.0361, "step": 22340 }, { "epoch": 16.113914924297045, "grad_norm": 0.1515766829252243, "learning_rate": 6.275772217394115e-05, "loss": 0.0351, "step": 22350 }, { "epoch": 16.1211247296323, "grad_norm": 0.07823029160499573, "learning_rate": 6.27257445763475e-05, "loss": 0.029, "step": 22360 }, { "epoch": 16.128334534967557, "grad_norm": 0.13996808230876923, "learning_rate": 6.269376141208082e-05, "loss": 0.0283, "step": 22370 }, { "epoch": 16.13554434030281, "grad_norm": 0.15687361359596252, "learning_rate": 6.266177269513162e-05, "loss": 0.0356, "step": 22380 }, { "epoch": 16.14275414563807, "grad_norm": 0.09274108707904816, "learning_rate": 6.262977843949289e-05, "loss": 0.0337, "step": 22390 }, { "epoch": 16.149963950973323, "grad_norm": 0.14968180656433105, "learning_rate": 6.259777865915997e-05, "loss": 0.029, "step": 22400 }, { "epoch": 16.15717375630858, "grad_norm": 0.12749062478542328, "learning_rate": 6.256577336813069e-05, "loss": 0.0339, "step": 22410 }, { "epoch": 16.164383561643834, "grad_norm": 0.11296242475509644, "learning_rate": 6.253376258040521e-05, "loss": 0.0339, "step": 22420 }, { "epoch": 16.171593366979092, "grad_norm": 0.09441390633583069, "learning_rate": 6.250174630998616e-05, "loss": 0.03, "step": 22430 }, { "epoch": 16.178803172314346, "grad_norm": 0.13798755407333374, "learning_rate": 6.246972457087854e-05, "loss": 0.0308, "step": 22440 }, { "epoch": 16.186012977649604, "grad_norm": 0.14176641404628754, "learning_rate": 6.243769737708974e-05, "loss": 0.0283, "step": 22450 }, { "epoch": 16.193222782984858, "grad_norm": 0.14061300456523895, "learning_rate": 6.240566474262955e-05, "loss": 0.0389, "step": 22460 }, { "epoch": 16.200432588320115, "grad_norm": 0.1847897619009018, "learning_rate": 6.237362668151012e-05, "loss": 0.0338, "step": 22470 }, { "epoch": 16.20764239365537, "grad_norm": 0.12456739693880081, "learning_rate": 6.2341583207746e-05, "loss": 0.0295, "step": 22480 }, { "epoch": 16.214852198990627, "grad_norm": 0.1373836100101471, "learning_rate": 6.23095343353541e-05, "loss": 0.03, "step": 22490 }, { "epoch": 16.222062004325885, "grad_norm": 0.1734667718410492, "learning_rate": 6.227748007835366e-05, "loss": 0.0361, "step": 22500 }, { "epoch": 16.22927180966114, "grad_norm": 0.15373210608959198, "learning_rate": 6.224542045076632e-05, "loss": 0.0305, "step": 22510 }, { "epoch": 16.236481614996396, "grad_norm": 0.15439410507678986, "learning_rate": 6.221335546661606e-05, "loss": 0.0326, "step": 22520 }, { "epoch": 16.24369142033165, "grad_norm": 0.12315978109836578, "learning_rate": 6.218128513992917e-05, "loss": 0.0299, "step": 22530 }, { "epoch": 16.250901225666908, "grad_norm": 0.15907970070838928, "learning_rate": 6.214920948473433e-05, "loss": 0.0352, "step": 22540 }, { "epoch": 16.258111031002162, "grad_norm": 0.139762282371521, "learning_rate": 6.211712851506249e-05, "loss": 0.0285, "step": 22550 }, { "epoch": 16.26532083633742, "grad_norm": 0.1270601600408554, "learning_rate": 6.208504224494696e-05, "loss": 0.0314, "step": 22560 }, { "epoch": 16.272530641672674, "grad_norm": 0.1633496731519699, "learning_rate": 6.205295068842338e-05, "loss": 0.0282, "step": 22570 }, { "epoch": 16.27974044700793, "grad_norm": 0.14494028687477112, "learning_rate": 6.202085385952968e-05, "loss": 0.0316, "step": 22580 }, { "epoch": 16.286950252343185, "grad_norm": 0.14410248398780823, "learning_rate": 6.19887517723061e-05, "loss": 0.0292, "step": 22590 }, { "epoch": 16.294160057678443, "grad_norm": 0.11999019235372543, "learning_rate": 6.19566444407952e-05, "loss": 0.0262, "step": 22600 }, { "epoch": 16.301369863013697, "grad_norm": 0.10016100108623505, "learning_rate": 6.192453187904178e-05, "loss": 0.0313, "step": 22610 }, { "epoch": 16.308579668348955, "grad_norm": 0.143216073513031, "learning_rate": 6.189241410109303e-05, "loss": 0.0268, "step": 22620 }, { "epoch": 16.31578947368421, "grad_norm": 0.17127442359924316, "learning_rate": 6.186029112099829e-05, "loss": 0.0337, "step": 22630 }, { "epoch": 16.322999279019466, "grad_norm": 0.11385399848222733, "learning_rate": 6.182816295280926e-05, "loss": 0.0277, "step": 22640 }, { "epoch": 16.330209084354724, "grad_norm": 0.10591663420200348, "learning_rate": 6.179602961057993e-05, "loss": 0.0338, "step": 22650 }, { "epoch": 16.337418889689978, "grad_norm": 0.17703001201152802, "learning_rate": 6.176389110836647e-05, "loss": 0.0295, "step": 22660 }, { "epoch": 16.344628695025236, "grad_norm": 0.15194037556648254, "learning_rate": 6.173174746022738e-05, "loss": 0.0343, "step": 22670 }, { "epoch": 16.35183850036049, "grad_norm": 0.18826329708099365, "learning_rate": 6.169959868022338e-05, "loss": 0.0328, "step": 22680 }, { "epoch": 16.359048305695747, "grad_norm": 0.15673643350601196, "learning_rate": 6.166744478241742e-05, "loss": 0.0341, "step": 22690 }, { "epoch": 16.366258111031, "grad_norm": 0.16746771335601807, "learning_rate": 6.163528578087473e-05, "loss": 0.034, "step": 22700 }, { "epoch": 16.37346791636626, "grad_norm": 0.14368654787540436, "learning_rate": 6.160312168966274e-05, "loss": 0.0359, "step": 22710 }, { "epoch": 16.380677721701513, "grad_norm": 0.16237102448940277, "learning_rate": 6.157095252285112e-05, "loss": 0.0326, "step": 22720 }, { "epoch": 16.38788752703677, "grad_norm": 0.13145293295383453, "learning_rate": 6.153877829451174e-05, "loss": 0.036, "step": 22730 }, { "epoch": 16.395097332372025, "grad_norm": 0.10503803193569183, "learning_rate": 6.150659901871872e-05, "loss": 0.0346, "step": 22740 }, { "epoch": 16.402307137707282, "grad_norm": 0.12587614357471466, "learning_rate": 6.147441470954836e-05, "loss": 0.0342, "step": 22750 }, { "epoch": 16.409516943042536, "grad_norm": 0.1112155094742775, "learning_rate": 6.144222538107918e-05, "loss": 0.0308, "step": 22760 }, { "epoch": 16.416726748377794, "grad_norm": 0.18914692103862762, "learning_rate": 6.141003104739188e-05, "loss": 0.031, "step": 22770 }, { "epoch": 16.423936553713048, "grad_norm": 0.13665683567523956, "learning_rate": 6.137783172256935e-05, "loss": 0.0347, "step": 22780 }, { "epoch": 16.431146359048306, "grad_norm": 0.15970496833324432, "learning_rate": 6.134562742069669e-05, "loss": 0.0321, "step": 22790 }, { "epoch": 16.438356164383563, "grad_norm": 0.14846158027648926, "learning_rate": 6.131341815586111e-05, "loss": 0.0355, "step": 22800 }, { "epoch": 16.445565969718817, "grad_norm": 0.15028676390647888, "learning_rate": 6.128120394215209e-05, "loss": 0.0322, "step": 22810 }, { "epoch": 16.452775775054075, "grad_norm": 0.17687050998210907, "learning_rate": 6.124898479366116e-05, "loss": 0.0331, "step": 22820 }, { "epoch": 16.45998558038933, "grad_norm": 0.1595849096775055, "learning_rate": 6.121676072448214e-05, "loss": 0.0319, "step": 22830 }, { "epoch": 16.467195385724587, "grad_norm": 0.1360291689634323, "learning_rate": 6.118453174871089e-05, "loss": 0.0304, "step": 22840 }, { "epoch": 16.47440519105984, "grad_norm": 0.21677042543888092, "learning_rate": 6.115229788044548e-05, "loss": 0.0322, "step": 22850 }, { "epoch": 16.4816149963951, "grad_norm": 0.11288411170244217, "learning_rate": 6.112005913378607e-05, "loss": 0.0334, "step": 22860 }, { "epoch": 16.488824801730352, "grad_norm": 0.11553484201431274, "learning_rate": 6.1087815522835e-05, "loss": 0.0337, "step": 22870 }, { "epoch": 16.49603460706561, "grad_norm": 0.09150592237710953, "learning_rate": 6.105556706169673e-05, "loss": 0.0299, "step": 22880 }, { "epoch": 16.503244412400864, "grad_norm": 0.13956180214881897, "learning_rate": 6.102331376447782e-05, "loss": 0.0335, "step": 22890 }, { "epoch": 16.51045421773612, "grad_norm": 0.1650897115468979, "learning_rate": 6.099105564528697e-05, "loss": 0.036, "step": 22900 }, { "epoch": 16.517664023071376, "grad_norm": 0.1252184510231018, "learning_rate": 6.095879271823495e-05, "loss": 0.0286, "step": 22910 }, { "epoch": 16.524873828406633, "grad_norm": 0.16581183671951294, "learning_rate": 6.09265249974347e-05, "loss": 0.0314, "step": 22920 }, { "epoch": 16.532083633741887, "grad_norm": 0.1373465657234192, "learning_rate": 6.089425249700118e-05, "loss": 0.0353, "step": 22930 }, { "epoch": 16.539293439077145, "grad_norm": 0.11714773625135422, "learning_rate": 6.0861975231051505e-05, "loss": 0.0344, "step": 22940 }, { "epoch": 16.5465032444124, "grad_norm": 0.11008637398481369, "learning_rate": 6.082969321370484e-05, "loss": 0.0273, "step": 22950 }, { "epoch": 16.553713049747657, "grad_norm": 0.16458362340927124, "learning_rate": 6.079740645908243e-05, "loss": 0.029, "step": 22960 }, { "epoch": 16.560922855082914, "grad_norm": 0.12568028271198273, "learning_rate": 6.07651149813076e-05, "loss": 0.0321, "step": 22970 }, { "epoch": 16.56813266041817, "grad_norm": 0.12237198650836945, "learning_rate": 6.073281879450574e-05, "loss": 0.031, "step": 22980 }, { "epoch": 16.575342465753426, "grad_norm": 0.1756526529788971, "learning_rate": 6.070051791280431e-05, "loss": 0.0362, "step": 22990 }, { "epoch": 16.58255227108868, "grad_norm": 0.2578897476196289, "learning_rate": 6.066821235033281e-05, "loss": 0.0326, "step": 23000 }, { "epoch": 16.589762076423938, "grad_norm": 0.11972954124212265, "learning_rate": 6.063590212122277e-05, "loss": 0.0281, "step": 23010 }, { "epoch": 16.596971881759192, "grad_norm": 0.20492157340049744, "learning_rate": 6.0603587239607794e-05, "loss": 0.0346, "step": 23020 }, { "epoch": 16.60418168709445, "grad_norm": 0.1337532252073288, "learning_rate": 6.057126771962353e-05, "loss": 0.0305, "step": 23030 }, { "epoch": 16.611391492429703, "grad_norm": 0.11305487900972366, "learning_rate": 6.05389435754076e-05, "loss": 0.0315, "step": 23040 }, { "epoch": 16.61860129776496, "grad_norm": 0.11340704560279846, "learning_rate": 6.0506614821099705e-05, "loss": 0.0296, "step": 23050 }, { "epoch": 16.625811103100215, "grad_norm": 0.1842193752527237, "learning_rate": 6.047428147084153e-05, "loss": 0.0385, "step": 23060 }, { "epoch": 16.633020908435473, "grad_norm": 0.10930667072534561, "learning_rate": 6.044194353877679e-05, "loss": 0.0296, "step": 23070 }, { "epoch": 16.640230713770727, "grad_norm": 0.13771843910217285, "learning_rate": 6.04096010390512e-05, "loss": 0.0316, "step": 23080 }, { "epoch": 16.647440519105984, "grad_norm": 0.12410448491573334, "learning_rate": 6.0377253985812445e-05, "loss": 0.0368, "step": 23090 }, { "epoch": 16.65465032444124, "grad_norm": 0.15676289796829224, "learning_rate": 6.0344902393210235e-05, "loss": 0.0293, "step": 23100 }, { "epoch": 16.661860129776496, "grad_norm": 0.17267028987407684, "learning_rate": 6.031254627539625e-05, "loss": 0.0389, "step": 23110 }, { "epoch": 16.66906993511175, "grad_norm": 0.1631309539079666, "learning_rate": 6.028018564652418e-05, "loss": 0.0322, "step": 23120 }, { "epoch": 16.676279740447008, "grad_norm": 0.1592189222574234, "learning_rate": 6.024782052074964e-05, "loss": 0.0309, "step": 23130 }, { "epoch": 16.683489545782265, "grad_norm": 0.1748698651790619, "learning_rate": 6.021545091223026e-05, "loss": 0.0306, "step": 23140 }, { "epoch": 16.69069935111752, "grad_norm": 0.12771692872047424, "learning_rate": 6.0183076835125586e-05, "loss": 0.0292, "step": 23150 }, { "epoch": 16.697909156452777, "grad_norm": 0.13774500787258148, "learning_rate": 6.015069830359714e-05, "loss": 0.0331, "step": 23160 }, { "epoch": 16.70511896178803, "grad_norm": 0.1397700160741806, "learning_rate": 6.0118315331808384e-05, "loss": 0.0352, "step": 23170 }, { "epoch": 16.71232876712329, "grad_norm": 0.1734742373228073, "learning_rate": 6.0085927933924745e-05, "loss": 0.0301, "step": 23180 }, { "epoch": 16.719538572458543, "grad_norm": 0.11165005713701248, "learning_rate": 6.0053536124113585e-05, "loss": 0.031, "step": 23190 }, { "epoch": 16.7267483777938, "grad_norm": 0.12205390632152557, "learning_rate": 6.0021139916544144e-05, "loss": 0.0292, "step": 23200 }, { "epoch": 16.733958183129054, "grad_norm": 0.13712985813617706, "learning_rate": 5.998873932538764e-05, "loss": 0.0317, "step": 23210 }, { "epoch": 16.741167988464312, "grad_norm": 0.1371031403541565, "learning_rate": 5.995633436481721e-05, "loss": 0.0442, "step": 23220 }, { "epoch": 16.748377793799566, "grad_norm": 0.10183548182249069, "learning_rate": 5.9923925049007855e-05, "loss": 0.0334, "step": 23230 }, { "epoch": 16.755587599134824, "grad_norm": 0.13848967850208282, "learning_rate": 5.989151139213656e-05, "loss": 0.0391, "step": 23240 }, { "epoch": 16.762797404470078, "grad_norm": 0.1180538684129715, "learning_rate": 5.9859093408382094e-05, "loss": 0.033, "step": 23250 }, { "epoch": 16.770007209805335, "grad_norm": 0.18018700182437897, "learning_rate": 5.982667111192523e-05, "loss": 0.0406, "step": 23260 }, { "epoch": 16.77721701514059, "grad_norm": 0.14627835154533386, "learning_rate": 5.979424451694857e-05, "loss": 0.0341, "step": 23270 }, { "epoch": 16.784426820475847, "grad_norm": 0.13262516260147095, "learning_rate": 5.97618136376366e-05, "loss": 0.0322, "step": 23280 }, { "epoch": 16.791636625811105, "grad_norm": 0.1494065821170807, "learning_rate": 5.97293784881757e-05, "loss": 0.0342, "step": 23290 }, { "epoch": 16.79884643114636, "grad_norm": 0.11136701703071594, "learning_rate": 5.969693908275411e-05, "loss": 0.0349, "step": 23300 }, { "epoch": 16.806056236481616, "grad_norm": 0.12119448184967041, "learning_rate": 5.9664495435561916e-05, "loss": 0.0347, "step": 23310 }, { "epoch": 16.81326604181687, "grad_norm": 0.10550698637962341, "learning_rate": 5.963204756079108e-05, "loss": 0.0286, "step": 23320 }, { "epoch": 16.820475847152128, "grad_norm": 0.11488095670938492, "learning_rate": 5.959959547263538e-05, "loss": 0.0296, "step": 23330 }, { "epoch": 16.827685652487382, "grad_norm": 0.11850009113550186, "learning_rate": 5.95671391852905e-05, "loss": 0.0343, "step": 23340 }, { "epoch": 16.83489545782264, "grad_norm": 0.20520229637622833, "learning_rate": 5.95346787129539e-05, "loss": 0.0324, "step": 23350 }, { "epoch": 16.842105263157894, "grad_norm": 0.16454793512821198, "learning_rate": 5.950221406982489e-05, "loss": 0.0287, "step": 23360 }, { "epoch": 16.84931506849315, "grad_norm": 0.11133220791816711, "learning_rate": 5.9469745270104625e-05, "loss": 0.024, "step": 23370 }, { "epoch": 16.856524873828405, "grad_norm": 0.12038256973028183, "learning_rate": 5.9437272327996064e-05, "loss": 0.0314, "step": 23380 }, { "epoch": 16.863734679163663, "grad_norm": 0.17729562520980835, "learning_rate": 5.940479525770395e-05, "loss": 0.0337, "step": 23390 }, { "epoch": 16.870944484498917, "grad_norm": 0.1130274087190628, "learning_rate": 5.9372314073434876e-05, "loss": 0.0347, "step": 23400 }, { "epoch": 16.878154289834175, "grad_norm": 0.09572986513376236, "learning_rate": 5.93398287893972e-05, "loss": 0.0283, "step": 23410 }, { "epoch": 16.88536409516943, "grad_norm": 0.16857214272022247, "learning_rate": 5.930733941980111e-05, "loss": 0.0299, "step": 23420 }, { "epoch": 16.892573900504686, "grad_norm": 0.11972349882125854, "learning_rate": 5.927484597885855e-05, "loss": 0.0283, "step": 23430 }, { "epoch": 16.899783705839944, "grad_norm": 0.1451025754213333, "learning_rate": 5.924234848078325e-05, "loss": 0.0323, "step": 23440 }, { "epoch": 16.906993511175198, "grad_norm": 0.13479968905448914, "learning_rate": 5.9209846939790715e-05, "loss": 0.029, "step": 23450 }, { "epoch": 16.914203316510456, "grad_norm": 0.1262933909893036, "learning_rate": 5.917734137009824e-05, "loss": 0.0303, "step": 23460 }, { "epoch": 16.92141312184571, "grad_norm": 0.19157904386520386, "learning_rate": 5.914483178592485e-05, "loss": 0.0361, "step": 23470 }, { "epoch": 16.928622927180967, "grad_norm": 0.14049097895622253, "learning_rate": 5.911231820149136e-05, "loss": 0.0319, "step": 23480 }, { "epoch": 16.93583273251622, "grad_norm": 0.11258235573768616, "learning_rate": 5.907980063102031e-05, "loss": 0.0361, "step": 23490 }, { "epoch": 16.94304253785148, "grad_norm": 0.1875918209552765, "learning_rate": 5.904727908873597e-05, "loss": 0.0345, "step": 23500 }, { "epoch": 16.950252343186733, "grad_norm": 0.1833016723394394, "learning_rate": 5.90147535888644e-05, "loss": 0.0354, "step": 23510 }, { "epoch": 16.95746214852199, "grad_norm": 0.15967287123203278, "learning_rate": 5.898222414563333e-05, "loss": 0.0307, "step": 23520 }, { "epoch": 16.964671953857245, "grad_norm": 0.12535834312438965, "learning_rate": 5.8949690773272256e-05, "loss": 0.035, "step": 23530 }, { "epoch": 16.971881759192502, "grad_norm": 0.11676642298698425, "learning_rate": 5.891715348601239e-05, "loss": 0.0256, "step": 23540 }, { "epoch": 16.979091564527756, "grad_norm": 0.12368649989366531, "learning_rate": 5.888461229808663e-05, "loss": 0.033, "step": 23550 }, { "epoch": 16.986301369863014, "grad_norm": 0.14354318380355835, "learning_rate": 5.885206722372959e-05, "loss": 0.0243, "step": 23560 }, { "epoch": 16.993511175198268, "grad_norm": 0.11437007784843445, "learning_rate": 5.881951827717763e-05, "loss": 0.0314, "step": 23570 }, { "epoch": 17.000720980533526, "grad_norm": 0.17818671464920044, "learning_rate": 5.878696547266872e-05, "loss": 0.0312, "step": 23580 }, { "epoch": 17.00793078586878, "grad_norm": 0.17046138644218445, "learning_rate": 5.875440882444259e-05, "loss": 0.0309, "step": 23590 }, { "epoch": 17.015140591204037, "grad_norm": 0.21200333535671234, "learning_rate": 5.8721848346740617e-05, "loss": 0.0319, "step": 23600 }, { "epoch": 17.022350396539295, "grad_norm": 0.10939223319292068, "learning_rate": 5.868928405380585e-05, "loss": 0.0367, "step": 23610 }, { "epoch": 17.02956020187455, "grad_norm": 0.16242025792598724, "learning_rate": 5.865671595988304e-05, "loss": 0.0332, "step": 23620 }, { "epoch": 17.036770007209807, "grad_norm": 0.1708575189113617, "learning_rate": 5.862414407921857e-05, "loss": 0.0361, "step": 23630 }, { "epoch": 17.04397981254506, "grad_norm": 0.12930013239383698, "learning_rate": 5.8591568426060475e-05, "loss": 0.0351, "step": 23640 }, { "epoch": 17.05118961788032, "grad_norm": 0.14328624308109283, "learning_rate": 5.8558989014658464e-05, "loss": 0.0364, "step": 23650 }, { "epoch": 17.058399423215572, "grad_norm": 0.17723439633846283, "learning_rate": 5.8526405859263855e-05, "loss": 0.0351, "step": 23660 }, { "epoch": 17.06560922855083, "grad_norm": 0.11207881569862366, "learning_rate": 5.8493818974129667e-05, "loss": 0.0324, "step": 23670 }, { "epoch": 17.072819033886084, "grad_norm": 0.11128109693527222, "learning_rate": 5.846122837351048e-05, "loss": 0.0318, "step": 23680 }, { "epoch": 17.08002883922134, "grad_norm": 0.12069778144359589, "learning_rate": 5.842863407166255e-05, "loss": 0.0308, "step": 23690 }, { "epoch": 17.087238644556596, "grad_norm": 0.17907285690307617, "learning_rate": 5.83960360828437e-05, "loss": 0.0381, "step": 23700 }, { "epoch": 17.094448449891853, "grad_norm": 0.12133955210447311, "learning_rate": 5.836343442131343e-05, "loss": 0.0317, "step": 23710 }, { "epoch": 17.101658255227107, "grad_norm": 0.10309221595525742, "learning_rate": 5.833082910133281e-05, "loss": 0.0256, "step": 23720 }, { "epoch": 17.108868060562365, "grad_norm": 0.2013472318649292, "learning_rate": 5.82982201371645e-05, "loss": 0.0319, "step": 23730 }, { "epoch": 17.11607786589762, "grad_norm": 0.1193184182047844, "learning_rate": 5.826560754307279e-05, "loss": 0.0302, "step": 23740 }, { "epoch": 17.123287671232877, "grad_norm": 0.12664711475372314, "learning_rate": 5.8232991333323516e-05, "loss": 0.029, "step": 23750 }, { "epoch": 17.130497476568134, "grad_norm": 0.17437554895877838, "learning_rate": 5.8200371522184116e-05, "loss": 0.0326, "step": 23760 }, { "epoch": 17.13770728190339, "grad_norm": 0.15007242560386658, "learning_rate": 5.816774812392362e-05, "loss": 0.0382, "step": 23770 }, { "epoch": 17.144917087238646, "grad_norm": 0.09145089983940125, "learning_rate": 5.8135121152812614e-05, "loss": 0.0259, "step": 23780 }, { "epoch": 17.1521268925739, "grad_norm": 0.1403050422668457, "learning_rate": 5.8102490623123204e-05, "loss": 0.0289, "step": 23790 }, { "epoch": 17.159336697909158, "grad_norm": 0.11564921587705612, "learning_rate": 5.806985654912914e-05, "loss": 0.0318, "step": 23800 }, { "epoch": 17.166546503244412, "grad_norm": 0.13001731038093567, "learning_rate": 5.8037218945105644e-05, "loss": 0.0317, "step": 23810 }, { "epoch": 17.17375630857967, "grad_norm": 0.13020938634872437, "learning_rate": 5.800457782532953e-05, "loss": 0.0329, "step": 23820 }, { "epoch": 17.180966113914923, "grad_norm": 0.13556373119354248, "learning_rate": 5.797193320407912e-05, "loss": 0.0303, "step": 23830 }, { "epoch": 17.18817591925018, "grad_norm": 0.20946601033210754, "learning_rate": 5.793928509563431e-05, "loss": 0.0337, "step": 23840 }, { "epoch": 17.195385724585435, "grad_norm": 0.1343556046485901, "learning_rate": 5.790663351427644e-05, "loss": 0.0283, "step": 23850 }, { "epoch": 17.202595529920693, "grad_norm": 0.13859973847866058, "learning_rate": 5.7873978474288484e-05, "loss": 0.037, "step": 23860 }, { "epoch": 17.209805335255947, "grad_norm": 0.1666380763053894, "learning_rate": 5.78413199899548e-05, "loss": 0.0359, "step": 23870 }, { "epoch": 17.217015140591204, "grad_norm": 0.19363553822040558, "learning_rate": 5.780865807556136e-05, "loss": 0.0355, "step": 23880 }, { "epoch": 17.22422494592646, "grad_norm": 0.21800212562084198, "learning_rate": 5.777599274539559e-05, "loss": 0.036, "step": 23890 }, { "epoch": 17.231434751261716, "grad_norm": 0.16205452382564545, "learning_rate": 5.77433240137464e-05, "loss": 0.0336, "step": 23900 }, { "epoch": 17.23864455659697, "grad_norm": 0.15697041153907776, "learning_rate": 5.771065189490422e-05, "loss": 0.0263, "step": 23910 }, { "epoch": 17.245854361932228, "grad_norm": 0.16562987864017487, "learning_rate": 5.767797640316095e-05, "loss": 0.0276, "step": 23920 }, { "epoch": 17.253064167267485, "grad_norm": 0.1519874632358551, "learning_rate": 5.764529755280994e-05, "loss": 0.0349, "step": 23930 }, { "epoch": 17.26027397260274, "grad_norm": 0.10552878677845001, "learning_rate": 5.761261535814603e-05, "loss": 0.0317, "step": 23940 }, { "epoch": 17.267483777937997, "grad_norm": 0.14011231064796448, "learning_rate": 5.757992983346554e-05, "loss": 0.0303, "step": 23950 }, { "epoch": 17.27469358327325, "grad_norm": 0.1486627161502838, "learning_rate": 5.754724099306621e-05, "loss": 0.0303, "step": 23960 }, { "epoch": 17.28190338860851, "grad_norm": 0.16013078391551971, "learning_rate": 5.751454885124727e-05, "loss": 0.0281, "step": 23970 }, { "epoch": 17.289113193943763, "grad_norm": 0.13065455853939056, "learning_rate": 5.748185342230935e-05, "loss": 0.0304, "step": 23980 }, { "epoch": 17.29632299927902, "grad_norm": 0.17330563068389893, "learning_rate": 5.744915472055456e-05, "loss": 0.0303, "step": 23990 }, { "epoch": 17.303532804614274, "grad_norm": 0.14036613702774048, "learning_rate": 5.741645276028642e-05, "loss": 0.0346, "step": 24000 }, { "epoch": 17.310742609949532, "grad_norm": 0.1831158697605133, "learning_rate": 5.7383747555809873e-05, "loss": 0.0339, "step": 24010 }, { "epoch": 17.317952415284786, "grad_norm": 0.20349475741386414, "learning_rate": 5.7351039121431294e-05, "loss": 0.0318, "step": 24020 }, { "epoch": 17.325162220620044, "grad_norm": 0.2071589231491089, "learning_rate": 5.7318327471458454e-05, "loss": 0.0318, "step": 24030 }, { "epoch": 17.332372025955298, "grad_norm": 0.0986667349934578, "learning_rate": 5.728561262020054e-05, "loss": 0.0281, "step": 24040 }, { "epoch": 17.339581831290555, "grad_norm": 0.13111619651317596, "learning_rate": 5.725289458196814e-05, "loss": 0.0308, "step": 24050 }, { "epoch": 17.34679163662581, "grad_norm": 0.1220373809337616, "learning_rate": 5.722017337107324e-05, "loss": 0.0297, "step": 24060 }, { "epoch": 17.354001441961067, "grad_norm": 0.14949525892734528, "learning_rate": 5.7187449001829206e-05, "loss": 0.0345, "step": 24070 }, { "epoch": 17.361211247296325, "grad_norm": 0.09812604635953903, "learning_rate": 5.715472148855081e-05, "loss": 0.0276, "step": 24080 }, { "epoch": 17.36842105263158, "grad_norm": 0.12969638407230377, "learning_rate": 5.712199084555414e-05, "loss": 0.0371, "step": 24090 }, { "epoch": 17.375630857966836, "grad_norm": 0.23893395066261292, "learning_rate": 5.7089257087156734e-05, "loss": 0.0377, "step": 24100 }, { "epoch": 17.38284066330209, "grad_norm": 0.10016059130430222, "learning_rate": 5.70565202276774e-05, "loss": 0.0276, "step": 24110 }, { "epoch": 17.390050468637348, "grad_norm": 0.1328815072774887, "learning_rate": 5.70237802814364e-05, "loss": 0.032, "step": 24120 }, { "epoch": 17.397260273972602, "grad_norm": 0.14201363921165466, "learning_rate": 5.6991037262755277e-05, "loss": 0.0321, "step": 24130 }, { "epoch": 17.40447007930786, "grad_norm": 0.13828185200691223, "learning_rate": 5.6958291185956924e-05, "loss": 0.0298, "step": 24140 }, { "epoch": 17.411679884643114, "grad_norm": 0.18836331367492676, "learning_rate": 5.692554206536561e-05, "loss": 0.032, "step": 24150 }, { "epoch": 17.41888968997837, "grad_norm": 0.14903950691223145, "learning_rate": 5.689278991530692e-05, "loss": 0.0293, "step": 24160 }, { "epoch": 17.426099495313625, "grad_norm": 0.13755828142166138, "learning_rate": 5.686003475010773e-05, "loss": 0.0323, "step": 24170 }, { "epoch": 17.433309300648883, "grad_norm": 0.19819375872612, "learning_rate": 5.682727658409628e-05, "loss": 0.0305, "step": 24180 }, { "epoch": 17.440519105984137, "grad_norm": 0.06978776305913925, "learning_rate": 5.67945154316021e-05, "loss": 0.0295, "step": 24190 }, { "epoch": 17.447728911319395, "grad_norm": 0.12199114263057709, "learning_rate": 5.676175130695602e-05, "loss": 0.0346, "step": 24200 }, { "epoch": 17.45493871665465, "grad_norm": 0.15495041012763977, "learning_rate": 5.672898422449022e-05, "loss": 0.031, "step": 24210 }, { "epoch": 17.462148521989906, "grad_norm": 0.12383783608675003, "learning_rate": 5.66962141985381e-05, "loss": 0.0295, "step": 24220 }, { "epoch": 17.46935832732516, "grad_norm": 0.14290916919708252, "learning_rate": 5.666344124343439e-05, "loss": 0.0332, "step": 24230 }, { "epoch": 17.476568132660418, "grad_norm": 0.11903891712427139, "learning_rate": 5.66306653735151e-05, "loss": 0.0301, "step": 24240 }, { "epoch": 17.483777937995676, "grad_norm": 0.1558760106563568, "learning_rate": 5.659788660311749e-05, "loss": 0.0347, "step": 24250 }, { "epoch": 17.49098774333093, "grad_norm": 0.14581021666526794, "learning_rate": 5.656510494658014e-05, "loss": 0.0307, "step": 24260 }, { "epoch": 17.498197548666187, "grad_norm": 0.17625541985034943, "learning_rate": 5.6532320418242835e-05, "loss": 0.0298, "step": 24270 }, { "epoch": 17.50540735400144, "grad_norm": 0.14119957387447357, "learning_rate": 5.649953303244665e-05, "loss": 0.0335, "step": 24280 }, { "epoch": 17.5126171593367, "grad_norm": 0.14995576441287994, "learning_rate": 5.646674280353389e-05, "loss": 0.0284, "step": 24290 }, { "epoch": 17.519826964671953, "grad_norm": 0.08949552476406097, "learning_rate": 5.6433949745848135e-05, "loss": 0.0282, "step": 24300 }, { "epoch": 17.52703677000721, "grad_norm": 0.18132707476615906, "learning_rate": 5.6401153873734166e-05, "loss": 0.0312, "step": 24310 }, { "epoch": 17.534246575342465, "grad_norm": 0.14598366618156433, "learning_rate": 5.6368355201538036e-05, "loss": 0.0296, "step": 24320 }, { "epoch": 17.541456380677722, "grad_norm": 0.14530640840530396, "learning_rate": 5.633555374360697e-05, "loss": 0.0336, "step": 24330 }, { "epoch": 17.548666186012976, "grad_norm": 0.16872717440128326, "learning_rate": 5.630274951428944e-05, "loss": 0.0325, "step": 24340 }, { "epoch": 17.555875991348234, "grad_norm": 0.11569187045097351, "learning_rate": 5.626994252793517e-05, "loss": 0.0266, "step": 24350 }, { "epoch": 17.563085796683488, "grad_norm": 0.17623834311962128, "learning_rate": 5.6237132798895006e-05, "loss": 0.0303, "step": 24360 }, { "epoch": 17.570295602018746, "grad_norm": 0.1539490669965744, "learning_rate": 5.620432034152106e-05, "loss": 0.0273, "step": 24370 }, { "epoch": 17.577505407354, "grad_norm": 0.10164433717727661, "learning_rate": 5.6171505170166604e-05, "loss": 0.0306, "step": 24380 }, { "epoch": 17.584715212689257, "grad_norm": 0.16002503037452698, "learning_rate": 5.613868729918612e-05, "loss": 0.0295, "step": 24390 }, { "epoch": 17.591925018024515, "grad_norm": 0.19850283861160278, "learning_rate": 5.6105866742935256e-05, "loss": 0.0361, "step": 24400 }, { "epoch": 17.59913482335977, "grad_norm": 0.1435423493385315, "learning_rate": 5.607304351577083e-05, "loss": 0.0294, "step": 24410 }, { "epoch": 17.606344628695027, "grad_norm": 0.14328619837760925, "learning_rate": 5.604021763205085e-05, "loss": 0.0295, "step": 24420 }, { "epoch": 17.61355443403028, "grad_norm": 0.12783965468406677, "learning_rate": 5.600738910613445e-05, "loss": 0.0312, "step": 24430 }, { "epoch": 17.62076423936554, "grad_norm": 0.10394157469272614, "learning_rate": 5.597455795238198e-05, "loss": 0.033, "step": 24440 }, { "epoch": 17.627974044700792, "grad_norm": 0.12731869518756866, "learning_rate": 5.5941724185154876e-05, "loss": 0.0292, "step": 24450 }, { "epoch": 17.63518385003605, "grad_norm": 0.182417094707489, "learning_rate": 5.5908887818815746e-05, "loss": 0.0317, "step": 24460 }, { "epoch": 17.642393655371304, "grad_norm": 0.1207175925374031, "learning_rate": 5.587604886772834e-05, "loss": 0.0322, "step": 24470 }, { "epoch": 17.64960346070656, "grad_norm": 0.15433235466480255, "learning_rate": 5.584320734625752e-05, "loss": 0.035, "step": 24480 }, { "epoch": 17.656813266041816, "grad_norm": 0.19119498133659363, "learning_rate": 5.581036326876928e-05, "loss": 0.0287, "step": 24490 }, { "epoch": 17.664023071377073, "grad_norm": 0.15168102085590363, "learning_rate": 5.5777516649630756e-05, "loss": 0.0285, "step": 24500 }, { "epoch": 17.671232876712327, "grad_norm": 0.11124053597450256, "learning_rate": 5.574466750321017e-05, "loss": 0.0248, "step": 24510 }, { "epoch": 17.678442682047585, "grad_norm": 0.1595943421125412, "learning_rate": 5.571181584387683e-05, "loss": 0.0277, "step": 24520 }, { "epoch": 17.68565248738284, "grad_norm": 0.18671803176403046, "learning_rate": 5.5678961686001196e-05, "loss": 0.0345, "step": 24530 }, { "epoch": 17.692862292718097, "grad_norm": 0.1353497952222824, "learning_rate": 5.5646105043954785e-05, "loss": 0.0325, "step": 24540 }, { "epoch": 17.700072098053354, "grad_norm": 0.12571635842323303, "learning_rate": 5.5613245932110205e-05, "loss": 0.0326, "step": 24550 }, { "epoch": 17.70728190338861, "grad_norm": 0.12409773468971252, "learning_rate": 5.5580384364841157e-05, "loss": 0.0295, "step": 24560 }, { "epoch": 17.714491708723866, "grad_norm": 0.12863631546497345, "learning_rate": 5.554752035652237e-05, "loss": 0.0261, "step": 24570 }, { "epoch": 17.72170151405912, "grad_norm": 0.12591296434402466, "learning_rate": 5.5514653921529736e-05, "loss": 0.0307, "step": 24580 }, { "epoch": 17.728911319394378, "grad_norm": 0.15741713345050812, "learning_rate": 5.5481785074240114e-05, "loss": 0.0262, "step": 24590 }, { "epoch": 17.73612112472963, "grad_norm": 0.19006560742855072, "learning_rate": 5.544891382903146e-05, "loss": 0.0344, "step": 24600 }, { "epoch": 17.74333093006489, "grad_norm": 0.15287549793720245, "learning_rate": 5.541604020028277e-05, "loss": 0.0341, "step": 24610 }, { "epoch": 17.750540735400143, "grad_norm": 0.10550843179225922, "learning_rate": 5.5383164202374105e-05, "loss": 0.0305, "step": 24620 }, { "epoch": 17.7577505407354, "grad_norm": 0.17928220331668854, "learning_rate": 5.5350285849686515e-05, "loss": 0.0358, "step": 24630 }, { "epoch": 17.764960346070655, "grad_norm": 0.1009732112288475, "learning_rate": 5.531740515660213e-05, "loss": 0.0304, "step": 24640 }, { "epoch": 17.772170151405913, "grad_norm": 0.15464556217193604, "learning_rate": 5.528452213750408e-05, "loss": 0.0322, "step": 24650 }, { "epoch": 17.779379956741167, "grad_norm": 0.1302526444196701, "learning_rate": 5.52516368067765e-05, "loss": 0.0269, "step": 24660 }, { "epoch": 17.786589762076424, "grad_norm": 0.12913112342357635, "learning_rate": 5.5218749178804563e-05, "loss": 0.0259, "step": 24670 }, { "epoch": 17.79379956741168, "grad_norm": 0.15199950337409973, "learning_rate": 5.518585926797443e-05, "loss": 0.0341, "step": 24680 }, { "epoch": 17.801009372746936, "grad_norm": 0.2026185542345047, "learning_rate": 5.5152967088673265e-05, "loss": 0.0309, "step": 24690 }, { "epoch": 17.80821917808219, "grad_norm": 0.11650130152702332, "learning_rate": 5.512007265528924e-05, "loss": 0.0337, "step": 24700 }, { "epoch": 17.815428983417448, "grad_norm": 0.13345882296562195, "learning_rate": 5.508717598221148e-05, "loss": 0.0309, "step": 24710 }, { "epoch": 17.822638788752705, "grad_norm": 0.15238292515277863, "learning_rate": 5.505427708383012e-05, "loss": 0.0309, "step": 24720 }, { "epoch": 17.82984859408796, "grad_norm": 0.15604010224342346, "learning_rate": 5.502137597453624e-05, "loss": 0.03, "step": 24730 }, { "epoch": 17.837058399423217, "grad_norm": 0.1458672434091568, "learning_rate": 5.4988472668721914e-05, "loss": 0.0317, "step": 24740 }, { "epoch": 17.84426820475847, "grad_norm": 0.1477522999048233, "learning_rate": 5.4955567180780164e-05, "loss": 0.0225, "step": 24750 }, { "epoch": 17.85147801009373, "grad_norm": 0.16762779653072357, "learning_rate": 5.492265952510497e-05, "loss": 0.034, "step": 24760 }, { "epoch": 17.858687815428983, "grad_norm": 0.10680843144655228, "learning_rate": 5.4889749716091254e-05, "loss": 0.0267, "step": 24770 }, { "epoch": 17.86589762076424, "grad_norm": 0.11966589093208313, "learning_rate": 5.4856837768134885e-05, "loss": 0.0272, "step": 24780 }, { "epoch": 17.873107426099494, "grad_norm": 0.1203431487083435, "learning_rate": 5.482392369563267e-05, "loss": 0.0308, "step": 24790 }, { "epoch": 17.880317231434752, "grad_norm": 0.160793274641037, "learning_rate": 5.4791007512982354e-05, "loss": 0.0311, "step": 24800 }, { "epoch": 17.887527036770006, "grad_norm": 0.16827097535133362, "learning_rate": 5.4758089234582566e-05, "loss": 0.0331, "step": 24810 }, { "epoch": 17.894736842105264, "grad_norm": 0.1271570324897766, "learning_rate": 5.47251688748329e-05, "loss": 0.0291, "step": 24820 }, { "epoch": 17.901946647440518, "grad_norm": 0.15432032942771912, "learning_rate": 5.4692246448133843e-05, "loss": 0.0259, "step": 24830 }, { "epoch": 17.909156452775775, "grad_norm": 0.1672380417585373, "learning_rate": 5.465932196888676e-05, "loss": 0.0287, "step": 24840 }, { "epoch": 17.91636625811103, "grad_norm": 0.1512354463338852, "learning_rate": 5.462639545149395e-05, "loss": 0.0285, "step": 24850 }, { "epoch": 17.923576063446287, "grad_norm": 0.1076057106256485, "learning_rate": 5.459346691035861e-05, "loss": 0.0365, "step": 24860 }, { "epoch": 17.93078586878154, "grad_norm": 0.1539268046617508, "learning_rate": 5.4560536359884776e-05, "loss": 0.033, "step": 24870 }, { "epoch": 17.9379956741168, "grad_norm": 0.16675838828086853, "learning_rate": 5.45276038144774e-05, "loss": 0.0248, "step": 24880 }, { "epoch": 17.945205479452056, "grad_norm": 0.14504559338092804, "learning_rate": 5.449466928854229e-05, "loss": 0.0304, "step": 24890 }, { "epoch": 17.95241528478731, "grad_norm": 0.17771735787391663, "learning_rate": 5.446173279648613e-05, "loss": 0.0286, "step": 24900 }, { "epoch": 17.959625090122568, "grad_norm": 0.14424186944961548, "learning_rate": 5.442879435271646e-05, "loss": 0.0279, "step": 24910 }, { "epoch": 17.966834895457822, "grad_norm": 0.15009483695030212, "learning_rate": 5.4395853971641675e-05, "loss": 0.0303, "step": 24920 }, { "epoch": 17.97404470079308, "grad_norm": 0.08759662508964539, "learning_rate": 5.4362911667671015e-05, "loss": 0.0251, "step": 24930 }, { "epoch": 17.981254506128334, "grad_norm": 0.1049162894487381, "learning_rate": 5.432996745521458e-05, "loss": 0.0296, "step": 24940 }, { "epoch": 17.98846431146359, "grad_norm": 0.1587454080581665, "learning_rate": 5.4297021348683264e-05, "loss": 0.0359, "step": 24950 }, { "epoch": 17.995674116798845, "grad_norm": 0.11668020486831665, "learning_rate": 5.426407336248882e-05, "loss": 0.0283, "step": 24960 }, { "epoch": 18.002883922134103, "grad_norm": 0.1256023794412613, "learning_rate": 5.423112351104382e-05, "loss": 0.0317, "step": 24970 }, { "epoch": 18.010093727469357, "grad_norm": 0.18339644372463226, "learning_rate": 5.4198171808761654e-05, "loss": 0.0382, "step": 24980 }, { "epoch": 18.017303532804615, "grad_norm": 0.11902455985546112, "learning_rate": 5.41652182700565e-05, "loss": 0.03, "step": 24990 }, { "epoch": 18.02451333813987, "grad_norm": 0.19561421871185303, "learning_rate": 5.413226290934338e-05, "loss": 0.0285, "step": 25000 }, { "epoch": 18.031723143475126, "grad_norm": 0.1285843849182129, "learning_rate": 5.4099305741038056e-05, "loss": 0.0317, "step": 25010 }, { "epoch": 18.03893294881038, "grad_norm": 0.14393332600593567, "learning_rate": 5.406634677955713e-05, "loss": 0.0359, "step": 25020 }, { "epoch": 18.046142754145638, "grad_norm": 0.10006286203861237, "learning_rate": 5.403338603931798e-05, "loss": 0.0281, "step": 25030 }, { "epoch": 18.053352559480896, "grad_norm": 0.14097654819488525, "learning_rate": 5.4000423534738745e-05, "loss": 0.0262, "step": 25040 }, { "epoch": 18.06056236481615, "grad_norm": 0.14502070844173431, "learning_rate": 5.396745928023835e-05, "loss": 0.0246, "step": 25050 }, { "epoch": 18.067772170151407, "grad_norm": 0.15204285085201263, "learning_rate": 5.3934493290236474e-05, "loss": 0.0321, "step": 25060 }, { "epoch": 18.07498197548666, "grad_norm": 0.11069481819868088, "learning_rate": 5.390152557915357e-05, "loss": 0.0297, "step": 25070 }, { "epoch": 18.08219178082192, "grad_norm": 0.13950230181217194, "learning_rate": 5.3868556161410796e-05, "loss": 0.0343, "step": 25080 }, { "epoch": 18.089401586157173, "grad_norm": 0.1102912575006485, "learning_rate": 5.383558505143015e-05, "loss": 0.0274, "step": 25090 }, { "epoch": 18.09661139149243, "grad_norm": 0.1158810704946518, "learning_rate": 5.38026122636343e-05, "loss": 0.029, "step": 25100 }, { "epoch": 18.103821196827685, "grad_norm": 0.16805918514728546, "learning_rate": 5.3769637812446636e-05, "loss": 0.0316, "step": 25110 }, { "epoch": 18.111031002162942, "grad_norm": 0.15952564775943756, "learning_rate": 5.373666171229133e-05, "loss": 0.034, "step": 25120 }, { "epoch": 18.118240807498196, "grad_norm": 0.13482041656970978, "learning_rate": 5.370368397759324e-05, "loss": 0.0341, "step": 25130 }, { "epoch": 18.125450612833454, "grad_norm": 0.15842951834201813, "learning_rate": 5.3670704622777944e-05, "loss": 0.0292, "step": 25140 }, { "epoch": 18.132660418168708, "grad_norm": 0.13583166897296906, "learning_rate": 5.3637723662271744e-05, "loss": 0.0288, "step": 25150 }, { "epoch": 18.139870223503966, "grad_norm": 0.1837039738893509, "learning_rate": 5.360474111050162e-05, "loss": 0.0361, "step": 25160 }, { "epoch": 18.14708002883922, "grad_norm": 0.20056524872779846, "learning_rate": 5.357175698189527e-05, "loss": 0.0297, "step": 25170 }, { "epoch": 18.154289834174477, "grad_norm": 0.12054767459630966, "learning_rate": 5.353877129088107e-05, "loss": 0.0273, "step": 25180 }, { "epoch": 18.161499639509735, "grad_norm": 0.10726016014814377, "learning_rate": 5.350578405188809e-05, "loss": 0.0303, "step": 25190 }, { "epoch": 18.16870944484499, "grad_norm": 0.08304845541715622, "learning_rate": 5.347279527934603e-05, "loss": 0.0257, "step": 25200 }, { "epoch": 18.175919250180247, "grad_norm": 0.1356649100780487, "learning_rate": 5.343980498768536e-05, "loss": 0.0304, "step": 25210 }, { "epoch": 18.1831290555155, "grad_norm": 0.13434931635856628, "learning_rate": 5.340681319133711e-05, "loss": 0.0241, "step": 25220 }, { "epoch": 18.19033886085076, "grad_norm": 0.15707279741764069, "learning_rate": 5.337381990473305e-05, "loss": 0.0272, "step": 25230 }, { "epoch": 18.197548666186012, "grad_norm": 0.12649406492710114, "learning_rate": 5.3340825142305504e-05, "loss": 0.0286, "step": 25240 }, { "epoch": 18.20475847152127, "grad_norm": 0.1127309799194336, "learning_rate": 5.330782891848756e-05, "loss": 0.0287, "step": 25250 }, { "epoch": 18.211968276856524, "grad_norm": 0.117633156478405, "learning_rate": 5.327483124771285e-05, "loss": 0.0345, "step": 25260 }, { "epoch": 18.21917808219178, "grad_norm": 0.11088218539953232, "learning_rate": 5.3241832144415706e-05, "loss": 0.0286, "step": 25270 }, { "epoch": 18.226387887527036, "grad_norm": 0.13844230771064758, "learning_rate": 5.320883162303104e-05, "loss": 0.0317, "step": 25280 }, { "epoch": 18.233597692862293, "grad_norm": 0.1830912083387375, "learning_rate": 5.3175829697994416e-05, "loss": 0.0337, "step": 25290 }, { "epoch": 18.240807498197547, "grad_norm": 0.13575606048107147, "learning_rate": 5.3142826383741975e-05, "loss": 0.0285, "step": 25300 }, { "epoch": 18.248017303532805, "grad_norm": 0.10786038637161255, "learning_rate": 5.3109821694710506e-05, "loss": 0.0311, "step": 25310 }, { "epoch": 18.25522710886806, "grad_norm": 0.10602555423974991, "learning_rate": 5.307681564533736e-05, "loss": 0.031, "step": 25320 }, { "epoch": 18.262436914203317, "grad_norm": 0.11303319782018661, "learning_rate": 5.3043808250060535e-05, "loss": 0.0279, "step": 25330 }, { "epoch": 18.26964671953857, "grad_norm": 0.1079239696264267, "learning_rate": 5.3010799523318566e-05, "loss": 0.0311, "step": 25340 }, { "epoch": 18.27685652487383, "grad_norm": 0.11588410288095474, "learning_rate": 5.29777894795506e-05, "loss": 0.027, "step": 25350 }, { "epoch": 18.284066330209086, "grad_norm": 0.17776617407798767, "learning_rate": 5.294477813319634e-05, "loss": 0.032, "step": 25360 }, { "epoch": 18.29127613554434, "grad_norm": 0.11681731045246124, "learning_rate": 5.291176549869608e-05, "loss": 0.0257, "step": 25370 }, { "epoch": 18.298485940879598, "grad_norm": 0.1113877221941948, "learning_rate": 5.287875159049065e-05, "loss": 0.0365, "step": 25380 }, { "epoch": 18.30569574621485, "grad_norm": 0.09708087891340256, "learning_rate": 5.284573642302149e-05, "loss": 0.0312, "step": 25390 }, { "epoch": 18.31290555155011, "grad_norm": 0.12482307106256485, "learning_rate": 5.2812720010730543e-05, "loss": 0.032, "step": 25400 }, { "epoch": 18.320115356885363, "grad_norm": 0.15593719482421875, "learning_rate": 5.277970236806029e-05, "loss": 0.0272, "step": 25410 }, { "epoch": 18.32732516222062, "grad_norm": 0.12572187185287476, "learning_rate": 5.2746683509453795e-05, "loss": 0.0304, "step": 25420 }, { "epoch": 18.334534967555875, "grad_norm": 0.1734185516834259, "learning_rate": 5.271366344935461e-05, "loss": 0.0304, "step": 25430 }, { "epoch": 18.341744772891133, "grad_norm": 0.13693930208683014, "learning_rate": 5.268064220220683e-05, "loss": 0.0306, "step": 25440 }, { "epoch": 18.348954578226387, "grad_norm": 0.1095038428902626, "learning_rate": 5.2647619782455094e-05, "loss": 0.0358, "step": 25450 }, { "epoch": 18.356164383561644, "grad_norm": 0.11240819096565247, "learning_rate": 5.261459620454451e-05, "loss": 0.0296, "step": 25460 }, { "epoch": 18.3633741888969, "grad_norm": 0.14043673872947693, "learning_rate": 5.258157148292071e-05, "loss": 0.0321, "step": 25470 }, { "epoch": 18.370583994232156, "grad_norm": 0.13644036650657654, "learning_rate": 5.2548545632029846e-05, "loss": 0.0282, "step": 25480 }, { "epoch": 18.37779379956741, "grad_norm": 0.20041914284229279, "learning_rate": 5.2515518666318534e-05, "loss": 0.0273, "step": 25490 }, { "epoch": 18.385003604902668, "grad_norm": 0.11715291440486908, "learning_rate": 5.248249060023389e-05, "loss": 0.0265, "step": 25500 }, { "epoch": 18.392213410237922, "grad_norm": 0.1528826206922531, "learning_rate": 5.2449461448223517e-05, "loss": 0.0306, "step": 25510 }, { "epoch": 18.39942321557318, "grad_norm": 0.1596190482378006, "learning_rate": 5.2416431224735486e-05, "loss": 0.0349, "step": 25520 }, { "epoch": 18.406633020908437, "grad_norm": 0.15967831015586853, "learning_rate": 5.238339994421836e-05, "loss": 0.0343, "step": 25530 }, { "epoch": 18.41384282624369, "grad_norm": 0.128269761800766, "learning_rate": 5.235036762112111e-05, "loss": 0.0329, "step": 25540 }, { "epoch": 18.42105263157895, "grad_norm": 0.12943367660045624, "learning_rate": 5.2317334269893206e-05, "loss": 0.0269, "step": 25550 }, { "epoch": 18.428262436914203, "grad_norm": 0.15065675973892212, "learning_rate": 5.228429990498458e-05, "loss": 0.0275, "step": 25560 }, { "epoch": 18.43547224224946, "grad_norm": 0.13144417107105255, "learning_rate": 5.225126454084556e-05, "loss": 0.0318, "step": 25570 }, { "epoch": 18.442682047584714, "grad_norm": 0.13027626276016235, "learning_rate": 5.221822819192694e-05, "loss": 0.0394, "step": 25580 }, { "epoch": 18.449891852919972, "grad_norm": 0.10896896570920944, "learning_rate": 5.218519087267995e-05, "loss": 0.0311, "step": 25590 }, { "epoch": 18.457101658255226, "grad_norm": 0.18281549215316772, "learning_rate": 5.2152152597556225e-05, "loss": 0.028, "step": 25600 }, { "epoch": 18.464311463590484, "grad_norm": 0.12873300909996033, "learning_rate": 5.211911338100784e-05, "loss": 0.0295, "step": 25610 }, { "epoch": 18.471521268925738, "grad_norm": 0.1637636423110962, "learning_rate": 5.2086073237487264e-05, "loss": 0.0311, "step": 25620 }, { "epoch": 18.478731074260995, "grad_norm": 0.1533752828836441, "learning_rate": 5.205303218144739e-05, "loss": 0.0292, "step": 25630 }, { "epoch": 18.48594087959625, "grad_norm": 0.12283046543598175, "learning_rate": 5.2019990227341494e-05, "loss": 0.0308, "step": 25640 }, { "epoch": 18.493150684931507, "grad_norm": 0.13929176330566406, "learning_rate": 5.198694738962324e-05, "loss": 0.0285, "step": 25650 }, { "epoch": 18.50036049026676, "grad_norm": 0.13564297556877136, "learning_rate": 5.19539036827467e-05, "loss": 0.0257, "step": 25660 }, { "epoch": 18.50757029560202, "grad_norm": 0.12946154177188873, "learning_rate": 5.192085912116629e-05, "loss": 0.028, "step": 25670 }, { "epoch": 18.514780100937276, "grad_norm": 0.15085981786251068, "learning_rate": 5.188781371933685e-05, "loss": 0.0281, "step": 25680 }, { "epoch": 18.52198990627253, "grad_norm": 0.17813904583454132, "learning_rate": 5.185476749171356e-05, "loss": 0.0282, "step": 25690 }, { "epoch": 18.529199711607788, "grad_norm": 0.14467908442020416, "learning_rate": 5.1821720452751945e-05, "loss": 0.031, "step": 25700 }, { "epoch": 18.536409516943042, "grad_norm": 0.16225102543830872, "learning_rate": 5.178867261690791e-05, "loss": 0.0327, "step": 25710 }, { "epoch": 18.5436193222783, "grad_norm": 0.12698188424110413, "learning_rate": 5.1755623998637715e-05, "loss": 0.0307, "step": 25720 }, { "epoch": 18.550829127613554, "grad_norm": 0.14586246013641357, "learning_rate": 5.172257461239794e-05, "loss": 0.0285, "step": 25730 }, { "epoch": 18.55803893294881, "grad_norm": 0.10928209125995636, "learning_rate": 5.168952447264548e-05, "loss": 0.0295, "step": 25740 }, { "epoch": 18.565248738284065, "grad_norm": 0.1302354633808136, "learning_rate": 5.165647359383763e-05, "loss": 0.0302, "step": 25750 }, { "epoch": 18.572458543619323, "grad_norm": 0.20719420909881592, "learning_rate": 5.1623421990431954e-05, "loss": 0.0261, "step": 25760 }, { "epoch": 18.579668348954577, "grad_norm": 0.1544523984193802, "learning_rate": 5.159036967688635e-05, "loss": 0.0315, "step": 25770 }, { "epoch": 18.586878154289835, "grad_norm": 0.15542985498905182, "learning_rate": 5.1557316667659015e-05, "loss": 0.0262, "step": 25780 }, { "epoch": 18.59408795962509, "grad_norm": 0.10317595303058624, "learning_rate": 5.152426297720846e-05, "loss": 0.03, "step": 25790 }, { "epoch": 18.601297764960346, "grad_norm": 0.14390653371810913, "learning_rate": 5.1491208619993505e-05, "loss": 0.0319, "step": 25800 }, { "epoch": 18.6085075702956, "grad_norm": 0.12334244698286057, "learning_rate": 5.145815361047321e-05, "loss": 0.029, "step": 25810 }, { "epoch": 18.615717375630858, "grad_norm": 0.1380155235528946, "learning_rate": 5.1425097963107003e-05, "loss": 0.0263, "step": 25820 }, { "epoch": 18.622927180966116, "grad_norm": 0.14699901640415192, "learning_rate": 5.1392041692354544e-05, "loss": 0.0266, "step": 25830 }, { "epoch": 18.63013698630137, "grad_norm": 0.10947633534669876, "learning_rate": 5.1358984812675736e-05, "loss": 0.0271, "step": 25840 }, { "epoch": 18.637346791636627, "grad_norm": 0.160508930683136, "learning_rate": 5.13259273385308e-05, "loss": 0.0323, "step": 25850 }, { "epoch": 18.64455659697188, "grad_norm": 0.10282903164625168, "learning_rate": 5.12928692843802e-05, "loss": 0.0353, "step": 25860 }, { "epoch": 18.65176640230714, "grad_norm": 0.1212688684463501, "learning_rate": 5.125981066468465e-05, "loss": 0.0336, "step": 25870 }, { "epoch": 18.658976207642393, "grad_norm": 0.15819308161735535, "learning_rate": 5.122675149390514e-05, "loss": 0.0263, "step": 25880 }, { "epoch": 18.66618601297765, "grad_norm": 0.18593887984752655, "learning_rate": 5.119369178650282e-05, "loss": 0.0307, "step": 25890 }, { "epoch": 18.673395818312905, "grad_norm": 0.15453431010246277, "learning_rate": 5.1160631556939156e-05, "loss": 0.0331, "step": 25900 }, { "epoch": 18.680605623648162, "grad_norm": 0.1006292998790741, "learning_rate": 5.112757081967584e-05, "loss": 0.026, "step": 25910 }, { "epoch": 18.687815428983416, "grad_norm": 0.11358613520860672, "learning_rate": 5.109450958917472e-05, "loss": 0.0365, "step": 25920 }, { "epoch": 18.695025234318674, "grad_norm": 0.14815686643123627, "learning_rate": 5.106144787989794e-05, "loss": 0.0296, "step": 25930 }, { "epoch": 18.702235039653928, "grad_norm": 0.14451530575752258, "learning_rate": 5.102838570630778e-05, "loss": 0.0265, "step": 25940 }, { "epoch": 18.709444844989186, "grad_norm": 0.15263262391090393, "learning_rate": 5.099532308286678e-05, "loss": 0.0279, "step": 25950 }, { "epoch": 18.71665465032444, "grad_norm": 0.137022465467453, "learning_rate": 5.0962260024037645e-05, "loss": 0.0316, "step": 25960 }, { "epoch": 18.723864455659697, "grad_norm": 0.1787310391664505, "learning_rate": 5.0929196544283276e-05, "loss": 0.0303, "step": 25970 }, { "epoch": 18.73107426099495, "grad_norm": 0.14725348353385925, "learning_rate": 5.089613265806678e-05, "loss": 0.0337, "step": 25980 }, { "epoch": 18.73828406633021, "grad_norm": 0.1626242846250534, "learning_rate": 5.086306837985141e-05, "loss": 0.0262, "step": 25990 }, { "epoch": 18.745493871665467, "grad_norm": 0.1366136372089386, "learning_rate": 5.0830003724100604e-05, "loss": 0.0368, "step": 26000 }, { "epoch": 18.75270367700072, "grad_norm": 0.12205162644386292, "learning_rate": 5.0796938705277984e-05, "loss": 0.0315, "step": 26010 }, { "epoch": 18.75991348233598, "grad_norm": 0.1006176620721817, "learning_rate": 5.076387333784729e-05, "loss": 0.0299, "step": 26020 }, { "epoch": 18.767123287671232, "grad_norm": 0.1021302118897438, "learning_rate": 5.0730807636272445e-05, "loss": 0.0267, "step": 26030 }, { "epoch": 18.77433309300649, "grad_norm": 0.13279373943805695, "learning_rate": 5.0697741615017524e-05, "loss": 0.0323, "step": 26040 }, { "epoch": 18.781542898341744, "grad_norm": 0.18203923106193542, "learning_rate": 5.06646752885467e-05, "loss": 0.0278, "step": 26050 }, { "epoch": 18.788752703677, "grad_norm": 0.10597492754459381, "learning_rate": 5.0631608671324324e-05, "loss": 0.0248, "step": 26060 }, { "epoch": 18.795962509012256, "grad_norm": 0.13714614510536194, "learning_rate": 5.059854177781487e-05, "loss": 0.0272, "step": 26070 }, { "epoch": 18.803172314347513, "grad_norm": 0.12078418582677841, "learning_rate": 5.05654746224829e-05, "loss": 0.029, "step": 26080 }, { "epoch": 18.810382119682767, "grad_norm": 0.17023798823356628, "learning_rate": 5.053240721979311e-05, "loss": 0.0324, "step": 26090 }, { "epoch": 18.817591925018025, "grad_norm": 0.156827911734581, "learning_rate": 5.049933958421031e-05, "loss": 0.0266, "step": 26100 }, { "epoch": 18.82480173035328, "grad_norm": 0.14070874452590942, "learning_rate": 5.046627173019943e-05, "loss": 0.0341, "step": 26110 }, { "epoch": 18.832011535688537, "grad_norm": 0.15782777965068817, "learning_rate": 5.043320367222545e-05, "loss": 0.0315, "step": 26120 }, { "epoch": 18.83922134102379, "grad_norm": 0.20399820804595947, "learning_rate": 5.040013542475346e-05, "loss": 0.0302, "step": 26130 }, { "epoch": 18.84643114635905, "grad_norm": 0.12327463924884796, "learning_rate": 5.0367067002248646e-05, "loss": 0.0311, "step": 26140 }, { "epoch": 18.853640951694302, "grad_norm": 0.15278375148773193, "learning_rate": 5.0333998419176254e-05, "loss": 0.0288, "step": 26150 }, { "epoch": 18.86085075702956, "grad_norm": 0.15271896123886108, "learning_rate": 5.030092969000161e-05, "loss": 0.0356, "step": 26160 }, { "epoch": 18.868060562364818, "grad_norm": 0.21339169144630432, "learning_rate": 5.026786082919008e-05, "loss": 0.0345, "step": 26170 }, { "epoch": 18.87527036770007, "grad_norm": 0.16706018149852753, "learning_rate": 5.0234791851207144e-05, "loss": 0.0287, "step": 26180 }, { "epoch": 18.88248017303533, "grad_norm": 0.14517024159431458, "learning_rate": 5.0201722770518256e-05, "loss": 0.0373, "step": 26190 }, { "epoch": 18.889689978370583, "grad_norm": 0.1516100913286209, "learning_rate": 5.0168653601589e-05, "loss": 0.03, "step": 26200 }, { "epoch": 18.89689978370584, "grad_norm": 0.15063682198524475, "learning_rate": 5.01355843588849e-05, "loss": 0.029, "step": 26210 }, { "epoch": 18.904109589041095, "grad_norm": 0.1474195271730423, "learning_rate": 5.010251505687161e-05, "loss": 0.033, "step": 26220 }, { "epoch": 18.911319394376353, "grad_norm": 0.1131487488746643, "learning_rate": 5.006944571001474e-05, "loss": 0.0305, "step": 26230 }, { "epoch": 18.918529199711607, "grad_norm": 0.1670733094215393, "learning_rate": 5.003637633277997e-05, "loss": 0.0295, "step": 26240 }, { "epoch": 18.925739005046864, "grad_norm": 0.14364007115364075, "learning_rate": 5.0003306939632946e-05, "loss": 0.0298, "step": 26250 }, { "epoch": 18.93294881038212, "grad_norm": 0.12552489340305328, "learning_rate": 4.997023754503937e-05, "loss": 0.0283, "step": 26260 }, { "epoch": 18.940158615717376, "grad_norm": 0.1828671097755432, "learning_rate": 4.9937168163464897e-05, "loss": 0.031, "step": 26270 }, { "epoch": 18.94736842105263, "grad_norm": 0.15859761834144592, "learning_rate": 4.990409880937519e-05, "loss": 0.0315, "step": 26280 }, { "epoch": 18.954578226387888, "grad_norm": 0.22267115116119385, "learning_rate": 4.9871029497235936e-05, "loss": 0.0277, "step": 26290 }, { "epoch": 18.96178803172314, "grad_norm": 0.1293202042579651, "learning_rate": 4.9837960241512766e-05, "loss": 0.0319, "step": 26300 }, { "epoch": 18.9689978370584, "grad_norm": 0.10727077722549438, "learning_rate": 4.9804891056671265e-05, "loss": 0.0286, "step": 26310 }, { "epoch": 18.976207642393657, "grad_norm": 0.1496768593788147, "learning_rate": 4.9771821957177074e-05, "loss": 0.0337, "step": 26320 }, { "epoch": 18.98341744772891, "grad_norm": 0.12459046393632889, "learning_rate": 4.9738752957495694e-05, "loss": 0.0304, "step": 26330 }, { "epoch": 18.99062725306417, "grad_norm": 0.12764710187911987, "learning_rate": 4.970568407209265e-05, "loss": 0.0322, "step": 26340 }, { "epoch": 18.997837058399423, "grad_norm": 0.15401719510555267, "learning_rate": 4.967261531543339e-05, "loss": 0.0329, "step": 26350 }, { "epoch": 19.00504686373468, "grad_norm": 0.1791287660598755, "learning_rate": 4.963954670198332e-05, "loss": 0.0361, "step": 26360 }, { "epoch": 19.012256669069934, "grad_norm": 0.17295777797698975, "learning_rate": 4.960647824620776e-05, "loss": 0.0298, "step": 26370 }, { "epoch": 19.019466474405192, "grad_norm": 0.12603281438350677, "learning_rate": 4.9573409962571995e-05, "loss": 0.0321, "step": 26380 }, { "epoch": 19.026676279740446, "grad_norm": 0.15921762585639954, "learning_rate": 4.954034186554121e-05, "loss": 0.0323, "step": 26390 }, { "epoch": 19.033886085075704, "grad_norm": 0.13872027397155762, "learning_rate": 4.950727396958051e-05, "loss": 0.0261, "step": 26400 }, { "epoch": 19.041095890410958, "grad_norm": 0.12230803817510605, "learning_rate": 4.947420628915489e-05, "loss": 0.0259, "step": 26410 }, { "epoch": 19.048305695746215, "grad_norm": 0.14883428812026978, "learning_rate": 4.944113883872932e-05, "loss": 0.0335, "step": 26420 }, { "epoch": 19.05551550108147, "grad_norm": 0.11653988808393478, "learning_rate": 4.9408071632768624e-05, "loss": 0.0315, "step": 26430 }, { "epoch": 19.062725306416727, "grad_norm": 0.19987399876117706, "learning_rate": 4.937500468573747e-05, "loss": 0.0316, "step": 26440 }, { "epoch": 19.06993511175198, "grad_norm": 0.1787411868572235, "learning_rate": 4.934193801210053e-05, "loss": 0.0265, "step": 26450 }, { "epoch": 19.07714491708724, "grad_norm": 0.09609664231538773, "learning_rate": 4.930887162632224e-05, "loss": 0.0298, "step": 26460 }, { "epoch": 19.084354722422496, "grad_norm": 0.14512470364570618, "learning_rate": 4.9275805542866994e-05, "loss": 0.0339, "step": 26470 }, { "epoch": 19.09156452775775, "grad_norm": 0.1519627422094345, "learning_rate": 4.9242739776199006e-05, "loss": 0.0279, "step": 26480 }, { "epoch": 19.098774333093008, "grad_norm": 0.15099336206912994, "learning_rate": 4.920967434078237e-05, "loss": 0.0268, "step": 26490 }, { "epoch": 19.105984138428262, "grad_norm": 0.10398747026920319, "learning_rate": 4.917660925108101e-05, "loss": 0.0254, "step": 26500 }, { "epoch": 19.11319394376352, "grad_norm": 0.1900533139705658, "learning_rate": 4.914354452155876e-05, "loss": 0.0327, "step": 26510 }, { "epoch": 19.120403749098774, "grad_norm": 0.14719411730766296, "learning_rate": 4.911048016667923e-05, "loss": 0.0259, "step": 26520 }, { "epoch": 19.12761355443403, "grad_norm": 0.15504559874534607, "learning_rate": 4.907741620090588e-05, "loss": 0.0301, "step": 26530 }, { "epoch": 19.134823359769285, "grad_norm": 0.13945040106773376, "learning_rate": 4.9044352638702046e-05, "loss": 0.0255, "step": 26540 }, { "epoch": 19.142033165104543, "grad_norm": 0.12937071919441223, "learning_rate": 4.9011289494530834e-05, "loss": 0.028, "step": 26550 }, { "epoch": 19.149242970439797, "grad_norm": 0.1907462775707245, "learning_rate": 4.897822678285516e-05, "loss": 0.0298, "step": 26560 }, { "epoch": 19.156452775775055, "grad_norm": 0.13881635665893555, "learning_rate": 4.8945164518137835e-05, "loss": 0.0271, "step": 26570 }, { "epoch": 19.16366258111031, "grad_norm": 0.12445378303527832, "learning_rate": 4.891210271484138e-05, "loss": 0.0276, "step": 26580 }, { "epoch": 19.170872386445566, "grad_norm": 0.17556844651699066, "learning_rate": 4.8879041387428164e-05, "loss": 0.0357, "step": 26590 }, { "epoch": 19.17808219178082, "grad_norm": 0.15679876506328583, "learning_rate": 4.8845980550360306e-05, "loss": 0.0277, "step": 26600 }, { "epoch": 19.185291997116078, "grad_norm": 0.1925029158592224, "learning_rate": 4.8812920218099786e-05, "loss": 0.0295, "step": 26610 }, { "epoch": 19.192501802451332, "grad_norm": 0.15504267811775208, "learning_rate": 4.8779860405108294e-05, "loss": 0.0307, "step": 26620 }, { "epoch": 19.19971160778659, "grad_norm": 0.14811743795871735, "learning_rate": 4.8746801125847285e-05, "loss": 0.0285, "step": 26630 }, { "epoch": 19.206921413121847, "grad_norm": 0.13559342920780182, "learning_rate": 4.8713742394778064e-05, "loss": 0.0305, "step": 26640 }, { "epoch": 19.2141312184571, "grad_norm": 0.13883045315742493, "learning_rate": 4.868068422636162e-05, "loss": 0.0314, "step": 26650 }, { "epoch": 19.22134102379236, "grad_norm": 0.13999228179454803, "learning_rate": 4.86476266350587e-05, "loss": 0.0377, "step": 26660 }, { "epoch": 19.228550829127613, "grad_norm": 0.18001826107501984, "learning_rate": 4.861456963532985e-05, "loss": 0.0313, "step": 26670 }, { "epoch": 19.23576063446287, "grad_norm": 0.2004229575395584, "learning_rate": 4.8581513241635316e-05, "loss": 0.027, "step": 26680 }, { "epoch": 19.242970439798125, "grad_norm": 0.13150981068611145, "learning_rate": 4.8548457468435065e-05, "loss": 0.0218, "step": 26690 }, { "epoch": 19.250180245133382, "grad_norm": 0.10624483227729797, "learning_rate": 4.851540233018884e-05, "loss": 0.0321, "step": 26700 }, { "epoch": 19.257390050468636, "grad_norm": 0.16706080734729767, "learning_rate": 4.848234784135608e-05, "loss": 0.0312, "step": 26710 }, { "epoch": 19.264599855803894, "grad_norm": 0.1585986465215683, "learning_rate": 4.8449294016395916e-05, "loss": 0.0331, "step": 26720 }, { "epoch": 19.271809661139148, "grad_norm": 0.15019695460796356, "learning_rate": 4.8416240869767246e-05, "loss": 0.0301, "step": 26730 }, { "epoch": 19.279019466474406, "grad_norm": 0.11277896910905838, "learning_rate": 4.838318841592864e-05, "loss": 0.0272, "step": 26740 }, { "epoch": 19.28622927180966, "grad_norm": 0.17758792638778687, "learning_rate": 4.835013666933834e-05, "loss": 0.0342, "step": 26750 }, { "epoch": 19.293439077144917, "grad_norm": 0.13586191833019257, "learning_rate": 4.83170856444543e-05, "loss": 0.0271, "step": 26760 }, { "epoch": 19.30064888248017, "grad_norm": 0.1426737904548645, "learning_rate": 4.828403535573419e-05, "loss": 0.0339, "step": 26770 }, { "epoch": 19.30785868781543, "grad_norm": 0.18626682460308075, "learning_rate": 4.825098581763532e-05, "loss": 0.0308, "step": 26780 }, { "epoch": 19.315068493150687, "grad_norm": 0.20288687944412231, "learning_rate": 4.821793704461465e-05, "loss": 0.0309, "step": 26790 }, { "epoch": 19.32227829848594, "grad_norm": 0.09992047399282455, "learning_rate": 4.8184889051128884e-05, "loss": 0.0314, "step": 26800 }, { "epoch": 19.3294881038212, "grad_norm": 0.1316489726305008, "learning_rate": 4.81518418516343e-05, "loss": 0.0311, "step": 26810 }, { "epoch": 19.336697909156452, "grad_norm": 0.10593656450510025, "learning_rate": 4.811879546058688e-05, "loss": 0.03, "step": 26820 }, { "epoch": 19.34390771449171, "grad_norm": 0.10996871441602707, "learning_rate": 4.808574989244224e-05, "loss": 0.0267, "step": 26830 }, { "epoch": 19.351117519826964, "grad_norm": 0.1499566286802292, "learning_rate": 4.8052705161655644e-05, "loss": 0.0298, "step": 26840 }, { "epoch": 19.35832732516222, "grad_norm": 0.12806817889213562, "learning_rate": 4.8019661282681936e-05, "loss": 0.0381, "step": 26850 }, { "epoch": 19.365537130497476, "grad_norm": 0.19294703006744385, "learning_rate": 4.79866182699757e-05, "loss": 0.0283, "step": 26860 }, { "epoch": 19.372746935832733, "grad_norm": 0.17560261487960815, "learning_rate": 4.795357613799103e-05, "loss": 0.0368, "step": 26870 }, { "epoch": 19.379956741167987, "grad_norm": 0.13274705410003662, "learning_rate": 4.792053490118166e-05, "loss": 0.0312, "step": 26880 }, { "epoch": 19.387166546503245, "grad_norm": 0.09942443668842316, "learning_rate": 4.788749457400099e-05, "loss": 0.036, "step": 26890 }, { "epoch": 19.3943763518385, "grad_norm": 0.157033309340477, "learning_rate": 4.7854455170901966e-05, "loss": 0.0283, "step": 26900 }, { "epoch": 19.401586157173757, "grad_norm": 0.11394993215799332, "learning_rate": 4.7821416706337136e-05, "loss": 0.0283, "step": 26910 }, { "epoch": 19.40879596250901, "grad_norm": 0.1882421374320984, "learning_rate": 4.7788379194758645e-05, "loss": 0.0342, "step": 26920 }, { "epoch": 19.41600576784427, "grad_norm": 0.17655080556869507, "learning_rate": 4.7755342650618246e-05, "loss": 0.0335, "step": 26930 }, { "epoch": 19.423215573179522, "grad_norm": 0.12585386633872986, "learning_rate": 4.7722307088367235e-05, "loss": 0.0244, "step": 26940 }, { "epoch": 19.43042537851478, "grad_norm": 0.1485484391450882, "learning_rate": 4.768927252245646e-05, "loss": 0.0313, "step": 26950 }, { "epoch": 19.437635183850038, "grad_norm": 0.1402527391910553, "learning_rate": 4.765623896733642e-05, "loss": 0.0361, "step": 26960 }, { "epoch": 19.44484498918529, "grad_norm": 0.164454847574234, "learning_rate": 4.762320643745708e-05, "loss": 0.0339, "step": 26970 }, { "epoch": 19.45205479452055, "grad_norm": 0.1697949469089508, "learning_rate": 4.7590174947267966e-05, "loss": 0.0271, "step": 26980 }, { "epoch": 19.459264599855803, "grad_norm": 0.1689959317445755, "learning_rate": 4.755714451121823e-05, "loss": 0.0322, "step": 26990 }, { "epoch": 19.46647440519106, "grad_norm": 0.18019606173038483, "learning_rate": 4.7524115143756476e-05, "loss": 0.0295, "step": 27000 }, { "epoch": 19.473684210526315, "grad_norm": 0.16433027386665344, "learning_rate": 4.7491086859330854e-05, "loss": 0.0274, "step": 27010 }, { "epoch": 19.480894015861573, "grad_norm": 0.14453132450580597, "learning_rate": 4.74580596723891e-05, "loss": 0.031, "step": 27020 }, { "epoch": 19.488103821196827, "grad_norm": 0.1797456592321396, "learning_rate": 4.742503359737841e-05, "loss": 0.034, "step": 27030 }, { "epoch": 19.495313626532084, "grad_norm": 0.15972216427326202, "learning_rate": 4.739200864874547e-05, "loss": 0.0277, "step": 27040 }, { "epoch": 19.50252343186734, "grad_norm": 0.11556222289800644, "learning_rate": 4.735898484093658e-05, "loss": 0.0243, "step": 27050 }, { "epoch": 19.509733237202596, "grad_norm": 0.14149613678455353, "learning_rate": 4.7325962188397444e-05, "loss": 0.0332, "step": 27060 }, { "epoch": 19.51694304253785, "grad_norm": 0.11556242406368256, "learning_rate": 4.729294070557329e-05, "loss": 0.0294, "step": 27070 }, { "epoch": 19.524152847873108, "grad_norm": 0.18093419075012207, "learning_rate": 4.7259920406908833e-05, "loss": 0.0248, "step": 27080 }, { "epoch": 19.53136265320836, "grad_norm": 0.12106654047966003, "learning_rate": 4.7226901306848306e-05, "loss": 0.0317, "step": 27090 }, { "epoch": 19.53857245854362, "grad_norm": 0.11043702811002731, "learning_rate": 4.719388341983535e-05, "loss": 0.0292, "step": 27100 }, { "epoch": 19.545782263878877, "grad_norm": 0.12683264911174774, "learning_rate": 4.71608667603131e-05, "loss": 0.0361, "step": 27110 }, { "epoch": 19.55299206921413, "grad_norm": 0.16543911397457123, "learning_rate": 4.712785134272422e-05, "loss": 0.031, "step": 27120 }, { "epoch": 19.56020187454939, "grad_norm": 0.1380304992198944, "learning_rate": 4.7094837181510734e-05, "loss": 0.0303, "step": 27130 }, { "epoch": 19.567411679884643, "grad_norm": 0.17781543731689453, "learning_rate": 4.706182429111414e-05, "loss": 0.0313, "step": 27140 }, { "epoch": 19.5746214852199, "grad_norm": 0.12918637692928314, "learning_rate": 4.702881268597545e-05, "loss": 0.0281, "step": 27150 }, { "epoch": 19.581831290555154, "grad_norm": 0.15434059500694275, "learning_rate": 4.699580238053503e-05, "loss": 0.0296, "step": 27160 }, { "epoch": 19.589041095890412, "grad_norm": 0.16182658076286316, "learning_rate": 4.69627933892327e-05, "loss": 0.0302, "step": 27170 }, { "epoch": 19.596250901225666, "grad_norm": 0.21728408336639404, "learning_rate": 4.692978572650773e-05, "loss": 0.0312, "step": 27180 }, { "epoch": 19.603460706560924, "grad_norm": 0.16980084776878357, "learning_rate": 4.689677940679881e-05, "loss": 0.0365, "step": 27190 }, { "epoch": 19.610670511896178, "grad_norm": 0.14351584017276764, "learning_rate": 4.6863774444543984e-05, "loss": 0.0322, "step": 27200 }, { "epoch": 19.617880317231435, "grad_norm": 0.132148876786232, "learning_rate": 4.6830770854180784e-05, "loss": 0.0293, "step": 27210 }, { "epoch": 19.62509012256669, "grad_norm": 0.17508356273174286, "learning_rate": 4.679776865014608e-05, "loss": 0.0258, "step": 27220 }, { "epoch": 19.632299927901947, "grad_norm": 0.2532515227794647, "learning_rate": 4.676476784687615e-05, "loss": 0.0323, "step": 27230 }, { "epoch": 19.6395097332372, "grad_norm": 0.15062569081783295, "learning_rate": 4.673176845880669e-05, "loss": 0.0269, "step": 27240 }, { "epoch": 19.64671953857246, "grad_norm": 0.16011881828308105, "learning_rate": 4.669877050037275e-05, "loss": 0.0291, "step": 27250 }, { "epoch": 19.653929343907713, "grad_norm": 0.1563696712255478, "learning_rate": 4.666577398600874e-05, "loss": 0.0289, "step": 27260 }, { "epoch": 19.66113914924297, "grad_norm": 0.13866516947746277, "learning_rate": 4.663277893014846e-05, "loss": 0.0318, "step": 27270 }, { "epoch": 19.668348954578228, "grad_norm": 0.19643080234527588, "learning_rate": 4.659978534722508e-05, "loss": 0.028, "step": 27280 }, { "epoch": 19.675558759913482, "grad_norm": 0.1437200903892517, "learning_rate": 4.6566793251671104e-05, "loss": 0.0303, "step": 27290 }, { "epoch": 19.68276856524874, "grad_norm": 0.16218248009681702, "learning_rate": 4.65338026579184e-05, "loss": 0.032, "step": 27300 }, { "epoch": 19.689978370583994, "grad_norm": 0.11790565401315689, "learning_rate": 4.650081358039817e-05, "loss": 0.0316, "step": 27310 }, { "epoch": 19.69718817591925, "grad_norm": 0.12467105686664581, "learning_rate": 4.646782603354098e-05, "loss": 0.029, "step": 27320 }, { "epoch": 19.704397981254505, "grad_norm": 0.14835280179977417, "learning_rate": 4.643484003177666e-05, "loss": 0.0324, "step": 27330 }, { "epoch": 19.711607786589763, "grad_norm": 0.1244325339794159, "learning_rate": 4.6401855589534454e-05, "loss": 0.0299, "step": 27340 }, { "epoch": 19.718817591925017, "grad_norm": 0.14103029668331146, "learning_rate": 4.636887272124286e-05, "loss": 0.0293, "step": 27350 }, { "epoch": 19.726027397260275, "grad_norm": 0.16673341393470764, "learning_rate": 4.6335891441329685e-05, "loss": 0.0323, "step": 27360 }, { "epoch": 19.73323720259553, "grad_norm": 0.13840073347091675, "learning_rate": 4.630291176422209e-05, "loss": 0.0349, "step": 27370 }, { "epoch": 19.740447007930786, "grad_norm": 0.16784963011741638, "learning_rate": 4.62699337043465e-05, "loss": 0.0371, "step": 27380 }, { "epoch": 19.74765681326604, "grad_norm": 0.14519423246383667, "learning_rate": 4.623695727612861e-05, "loss": 0.0252, "step": 27390 }, { "epoch": 19.754866618601298, "grad_norm": 0.16613849997520447, "learning_rate": 4.620398249399347e-05, "loss": 0.0297, "step": 27400 }, { "epoch": 19.762076423936552, "grad_norm": 0.18687529861927032, "learning_rate": 4.617100937236535e-05, "loss": 0.0312, "step": 27410 }, { "epoch": 19.76928622927181, "grad_norm": 0.13089622557163239, "learning_rate": 4.613803792566781e-05, "loss": 0.0285, "step": 27420 }, { "epoch": 19.776496034607067, "grad_norm": 0.14121368527412415, "learning_rate": 4.61050681683237e-05, "loss": 0.0317, "step": 27430 }, { "epoch": 19.78370583994232, "grad_norm": 0.14569559693336487, "learning_rate": 4.6072100114755104e-05, "loss": 0.0309, "step": 27440 }, { "epoch": 19.79091564527758, "grad_norm": 0.12374977022409439, "learning_rate": 4.603913377938336e-05, "loss": 0.027, "step": 27450 }, { "epoch": 19.798125450612833, "grad_norm": 0.1325894147157669, "learning_rate": 4.600616917662905e-05, "loss": 0.0266, "step": 27460 }, { "epoch": 19.80533525594809, "grad_norm": 0.13755381107330322, "learning_rate": 4.597320632091206e-05, "loss": 0.0368, "step": 27470 }, { "epoch": 19.812545061283345, "grad_norm": 0.11641599237918854, "learning_rate": 4.594024522665142e-05, "loss": 0.0257, "step": 27480 }, { "epoch": 19.819754866618602, "grad_norm": 0.20657680928707123, "learning_rate": 4.590728590826543e-05, "loss": 0.0293, "step": 27490 }, { "epoch": 19.826964671953856, "grad_norm": 0.11652985215187073, "learning_rate": 4.587432838017165e-05, "loss": 0.0293, "step": 27500 }, { "epoch": 19.834174477289114, "grad_norm": 0.11480807512998581, "learning_rate": 4.58413726567868e-05, "loss": 0.0344, "step": 27510 }, { "epoch": 19.841384282624368, "grad_norm": 0.18932634592056274, "learning_rate": 4.5808418752526814e-05, "loss": 0.03, "step": 27520 }, { "epoch": 19.848594087959626, "grad_norm": 0.17893359065055847, "learning_rate": 4.5775466681806895e-05, "loss": 0.033, "step": 27530 }, { "epoch": 19.85580389329488, "grad_norm": 0.19782039523124695, "learning_rate": 4.574251645904136e-05, "loss": 0.0269, "step": 27540 }, { "epoch": 19.863013698630137, "grad_norm": 0.13438117504119873, "learning_rate": 4.5709568098643776e-05, "loss": 0.0259, "step": 27550 }, { "epoch": 19.87022350396539, "grad_norm": 0.13413673639297485, "learning_rate": 4.567662161502687e-05, "loss": 0.026, "step": 27560 }, { "epoch": 19.87743330930065, "grad_norm": 0.1287628561258316, "learning_rate": 4.5643677022602554e-05, "loss": 0.0302, "step": 27570 }, { "epoch": 19.884643114635907, "grad_norm": 0.10443124920129776, "learning_rate": 4.56107343357819e-05, "loss": 0.0269, "step": 27580 }, { "epoch": 19.89185291997116, "grad_norm": 0.08283382654190063, "learning_rate": 4.557779356897518e-05, "loss": 0.025, "step": 27590 }, { "epoch": 19.89906272530642, "grad_norm": 0.1606883555650711, "learning_rate": 4.55448547365918e-05, "loss": 0.0302, "step": 27600 }, { "epoch": 19.906272530641672, "grad_norm": 0.10934331268072128, "learning_rate": 4.5511917853040315e-05, "loss": 0.0276, "step": 27610 }, { "epoch": 19.91348233597693, "grad_norm": 0.18943819403648376, "learning_rate": 4.547898293272842e-05, "loss": 0.0327, "step": 27620 }, { "epoch": 19.920692141312184, "grad_norm": 0.10040097683668137, "learning_rate": 4.544604999006301e-05, "loss": 0.0288, "step": 27630 }, { "epoch": 19.92790194664744, "grad_norm": 0.16336068511009216, "learning_rate": 4.541311903945004e-05, "loss": 0.027, "step": 27640 }, { "epoch": 19.935111751982696, "grad_norm": 0.14747264981269836, "learning_rate": 4.538019009529463e-05, "loss": 0.0332, "step": 27650 }, { "epoch": 19.942321557317953, "grad_norm": 0.13377253711223602, "learning_rate": 4.5347263172001035e-05, "loss": 0.0301, "step": 27660 }, { "epoch": 19.949531362653207, "grad_norm": 0.15810586512088776, "learning_rate": 4.531433828397261e-05, "loss": 0.0313, "step": 27670 }, { "epoch": 19.956741167988465, "grad_norm": 0.1355353146791458, "learning_rate": 4.528141544561179e-05, "loss": 0.0362, "step": 27680 }, { "epoch": 19.96395097332372, "grad_norm": 0.1257682591676712, "learning_rate": 4.5248494671320186e-05, "loss": 0.033, "step": 27690 }, { "epoch": 19.971160778658977, "grad_norm": 0.17531539499759674, "learning_rate": 4.521557597549844e-05, "loss": 0.0258, "step": 27700 }, { "epoch": 19.97837058399423, "grad_norm": 0.14820696413516998, "learning_rate": 4.5182659372546284e-05, "loss": 0.03, "step": 27710 }, { "epoch": 19.98558038932949, "grad_norm": 0.12612350285053253, "learning_rate": 4.514974487686261e-05, "loss": 0.0294, "step": 27720 }, { "epoch": 19.992790194664742, "grad_norm": 0.16213051974773407, "learning_rate": 4.5116832502845306e-05, "loss": 0.0259, "step": 27730 }, { "epoch": 20.0, "grad_norm": 0.2958701550960541, "learning_rate": 4.508392226489136e-05, "loss": 0.0291, "step": 27740 }, { "epoch": 20.007209805335258, "grad_norm": 0.09412521123886108, "learning_rate": 4.505101417739683e-05, "loss": 0.0358, "step": 27750 }, { "epoch": 20.01441961067051, "grad_norm": 0.15663085877895355, "learning_rate": 4.501810825475685e-05, "loss": 0.029, "step": 27760 }, { "epoch": 20.02162941600577, "grad_norm": 0.1499638557434082, "learning_rate": 4.4985204511365556e-05, "loss": 0.0309, "step": 27770 }, { "epoch": 20.028839221341023, "grad_norm": 0.2111012190580368, "learning_rate": 4.49523029616162e-05, "loss": 0.0252, "step": 27780 }, { "epoch": 20.03604902667628, "grad_norm": 0.13724400103092194, "learning_rate": 4.491940361990101e-05, "loss": 0.0359, "step": 27790 }, { "epoch": 20.043258832011535, "grad_norm": 0.17071960866451263, "learning_rate": 4.48865065006113e-05, "loss": 0.0282, "step": 27800 }, { "epoch": 20.050468637346793, "grad_norm": 0.17261549830436707, "learning_rate": 4.485361161813735e-05, "loss": 0.0319, "step": 27810 }, { "epoch": 20.057678442682047, "grad_norm": 0.1495809108018875, "learning_rate": 4.482071898686854e-05, "loss": 0.0326, "step": 27820 }, { "epoch": 20.064888248017304, "grad_norm": 0.1193099394440651, "learning_rate": 4.478782862119321e-05, "loss": 0.0291, "step": 27830 }, { "epoch": 20.07209805335256, "grad_norm": 0.1315683126449585, "learning_rate": 4.475494053549869e-05, "loss": 0.0296, "step": 27840 }, { "epoch": 20.079307858687816, "grad_norm": 0.15388274192810059, "learning_rate": 4.4722054744171405e-05, "loss": 0.032, "step": 27850 }, { "epoch": 20.08651766402307, "grad_norm": 0.10046131163835526, "learning_rate": 4.468917126159669e-05, "loss": 0.0276, "step": 27860 }, { "epoch": 20.093727469358328, "grad_norm": 0.10227905213832855, "learning_rate": 4.465629010215887e-05, "loss": 0.0254, "step": 27870 }, { "epoch": 20.10093727469358, "grad_norm": 0.1036173403263092, "learning_rate": 4.462341128024133e-05, "loss": 0.0281, "step": 27880 }, { "epoch": 20.10814708002884, "grad_norm": 0.11492575705051422, "learning_rate": 4.4590534810226356e-05, "loss": 0.0257, "step": 27890 }, { "epoch": 20.115356885364093, "grad_norm": 0.12208499014377594, "learning_rate": 4.455766070649523e-05, "loss": 0.0287, "step": 27900 }, { "epoch": 20.12256669069935, "grad_norm": 0.14206789433956146, "learning_rate": 4.4524788983428215e-05, "loss": 0.0293, "step": 27910 }, { "epoch": 20.12977649603461, "grad_norm": 0.12889571487903595, "learning_rate": 4.4491919655404515e-05, "loss": 0.0257, "step": 27920 }, { "epoch": 20.136986301369863, "grad_norm": 0.16890455782413483, "learning_rate": 4.445905273680228e-05, "loss": 0.0321, "step": 27930 }, { "epoch": 20.14419610670512, "grad_norm": 0.14256151020526886, "learning_rate": 4.442618824199865e-05, "loss": 0.0344, "step": 27940 }, { "epoch": 20.151405912040374, "grad_norm": 0.13362477719783783, "learning_rate": 4.4393326185369635e-05, "loss": 0.0306, "step": 27950 }, { "epoch": 20.158615717375632, "grad_norm": 0.18319836258888245, "learning_rate": 4.436046658129024e-05, "loss": 0.0313, "step": 27960 }, { "epoch": 20.165825522710886, "grad_norm": 0.13715989887714386, "learning_rate": 4.432760944413433e-05, "loss": 0.023, "step": 27970 }, { "epoch": 20.173035328046144, "grad_norm": 0.12190305441617966, "learning_rate": 4.4294754788274796e-05, "loss": 0.0282, "step": 27980 }, { "epoch": 20.180245133381398, "grad_norm": 0.12867453694343567, "learning_rate": 4.426190262808334e-05, "loss": 0.0283, "step": 27990 }, { "epoch": 20.187454938716655, "grad_norm": 0.13728801906108856, "learning_rate": 4.42290529779306e-05, "loss": 0.0304, "step": 28000 }, { "epoch": 20.19466474405191, "grad_norm": 0.160627543926239, "learning_rate": 4.4196205852186176e-05, "loss": 0.0259, "step": 28010 }, { "epoch": 20.201874549387167, "grad_norm": 0.17151819169521332, "learning_rate": 4.416336126521848e-05, "loss": 0.0275, "step": 28020 }, { "epoch": 20.20908435472242, "grad_norm": 0.15239374339580536, "learning_rate": 4.4130519231394865e-05, "loss": 0.0292, "step": 28030 }, { "epoch": 20.21629416005768, "grad_norm": 0.18419063091278076, "learning_rate": 4.409767976508154e-05, "loss": 0.0331, "step": 28040 }, { "epoch": 20.223503965392933, "grad_norm": 0.18107561767101288, "learning_rate": 4.406484288064363e-05, "loss": 0.0274, "step": 28050 }, { "epoch": 20.23071377072819, "grad_norm": 0.16234298050403595, "learning_rate": 4.4032008592445055e-05, "loss": 0.0261, "step": 28060 }, { "epoch": 20.237923576063448, "grad_norm": 0.17445647716522217, "learning_rate": 4.3999176914848714e-05, "loss": 0.0281, "step": 28070 }, { "epoch": 20.245133381398702, "grad_norm": 0.10731074959039688, "learning_rate": 4.396634786221625e-05, "loss": 0.0208, "step": 28080 }, { "epoch": 20.25234318673396, "grad_norm": 0.11469104140996933, "learning_rate": 4.3933521448908203e-05, "loss": 0.0295, "step": 28090 }, { "epoch": 20.259552992069214, "grad_norm": 0.13855841755867004, "learning_rate": 4.3900697689284e-05, "loss": 0.0267, "step": 28100 }, { "epoch": 20.26676279740447, "grad_norm": 0.13570253551006317, "learning_rate": 4.386787659770185e-05, "loss": 0.0307, "step": 28110 }, { "epoch": 20.273972602739725, "grad_norm": 0.13157905638217926, "learning_rate": 4.383505818851878e-05, "loss": 0.0296, "step": 28120 }, { "epoch": 20.281182408074983, "grad_norm": 0.1463562399148941, "learning_rate": 4.380224247609073e-05, "loss": 0.0304, "step": 28130 }, { "epoch": 20.288392213410237, "grad_norm": 0.15101192891597748, "learning_rate": 4.3769429474772376e-05, "loss": 0.027, "step": 28140 }, { "epoch": 20.295602018745495, "grad_norm": 0.13720722496509552, "learning_rate": 4.373661919891726e-05, "loss": 0.0251, "step": 28150 }, { "epoch": 20.30281182408075, "grad_norm": 0.21278606355190277, "learning_rate": 4.3703811662877674e-05, "loss": 0.029, "step": 28160 }, { "epoch": 20.310021629416006, "grad_norm": 0.1581203192472458, "learning_rate": 4.36710068810048e-05, "loss": 0.0245, "step": 28170 }, { "epoch": 20.31723143475126, "grad_norm": 0.16793791949748993, "learning_rate": 4.363820486764853e-05, "loss": 0.029, "step": 28180 }, { "epoch": 20.324441240086518, "grad_norm": 0.13719911873340607, "learning_rate": 4.360540563715758e-05, "loss": 0.0223, "step": 28190 }, { "epoch": 20.331651045421772, "grad_norm": 0.15701481699943542, "learning_rate": 4.357260920387946e-05, "loss": 0.0274, "step": 28200 }, { "epoch": 20.33886085075703, "grad_norm": 0.14609915018081665, "learning_rate": 4.353981558216045e-05, "loss": 0.0281, "step": 28210 }, { "epoch": 20.346070656092287, "grad_norm": 0.12661099433898926, "learning_rate": 4.3507024786345565e-05, "loss": 0.0298, "step": 28220 }, { "epoch": 20.35328046142754, "grad_norm": 0.16113053262233734, "learning_rate": 4.3474236830778646e-05, "loss": 0.0267, "step": 28230 }, { "epoch": 20.3604902667628, "grad_norm": 0.15579965710639954, "learning_rate": 4.344145172980223e-05, "loss": 0.0347, "step": 28240 }, { "epoch": 20.367700072098053, "grad_norm": 0.17822404205799103, "learning_rate": 4.3408669497757656e-05, "loss": 0.0307, "step": 28250 }, { "epoch": 20.37490987743331, "grad_norm": 0.16734011471271515, "learning_rate": 4.3375890148984976e-05, "loss": 0.0304, "step": 28260 }, { "epoch": 20.382119682768565, "grad_norm": 0.13273821771144867, "learning_rate": 4.334311369782301e-05, "loss": 0.0311, "step": 28270 }, { "epoch": 20.389329488103822, "grad_norm": 0.06445420533418655, "learning_rate": 4.3310340158609244e-05, "loss": 0.0276, "step": 28280 }, { "epoch": 20.396539293439076, "grad_norm": 0.13759860396385193, "learning_rate": 4.327756954568e-05, "loss": 0.0241, "step": 28290 }, { "epoch": 20.403749098774334, "grad_norm": 0.10568350553512573, "learning_rate": 4.324480187337021e-05, "loss": 0.0255, "step": 28300 }, { "epoch": 20.410958904109588, "grad_norm": 0.1619812399148941, "learning_rate": 4.3212037156013596e-05, "loss": 0.0219, "step": 28310 }, { "epoch": 20.418168709444846, "grad_norm": 0.12719081342220306, "learning_rate": 4.317927540794251e-05, "loss": 0.0272, "step": 28320 }, { "epoch": 20.4253785147801, "grad_norm": 0.1154041588306427, "learning_rate": 4.314651664348812e-05, "loss": 0.0299, "step": 28330 }, { "epoch": 20.432588320115357, "grad_norm": 0.11548127979040146, "learning_rate": 4.3113760876980195e-05, "loss": 0.0366, "step": 28340 }, { "epoch": 20.43979812545061, "grad_norm": 0.16948197782039642, "learning_rate": 4.308100812274719e-05, "loss": 0.0256, "step": 28350 }, { "epoch": 20.44700793078587, "grad_norm": 0.12159505486488342, "learning_rate": 4.304825839511632e-05, "loss": 0.0298, "step": 28360 }, { "epoch": 20.454217736121123, "grad_norm": 0.12059688568115234, "learning_rate": 4.3015511708413395e-05, "loss": 0.0269, "step": 28370 }, { "epoch": 20.46142754145638, "grad_norm": 0.16024014353752136, "learning_rate": 4.298276807696294e-05, "loss": 0.0293, "step": 28380 }, { "epoch": 20.46863734679164, "grad_norm": 0.15360333025455475, "learning_rate": 4.2950027515088136e-05, "loss": 0.029, "step": 28390 }, { "epoch": 20.475847152126892, "grad_norm": 0.24305395781993866, "learning_rate": 4.291729003711083e-05, "loss": 0.0282, "step": 28400 }, { "epoch": 20.48305695746215, "grad_norm": 0.1508532166481018, "learning_rate": 4.2884555657351455e-05, "loss": 0.0281, "step": 28410 }, { "epoch": 20.490266762797404, "grad_norm": 0.20372265577316284, "learning_rate": 4.285182439012921e-05, "loss": 0.0295, "step": 28420 }, { "epoch": 20.49747656813266, "grad_norm": 0.1496293693780899, "learning_rate": 4.281909624976183e-05, "loss": 0.0317, "step": 28430 }, { "epoch": 20.504686373467916, "grad_norm": 0.19814687967300415, "learning_rate": 4.278637125056568e-05, "loss": 0.0298, "step": 28440 }, { "epoch": 20.511896178803173, "grad_norm": 0.1712312400341034, "learning_rate": 4.2753649406855855e-05, "loss": 0.0305, "step": 28450 }, { "epoch": 20.519105984138427, "grad_norm": 0.11079125851392746, "learning_rate": 4.2720930732945966e-05, "loss": 0.0291, "step": 28460 }, { "epoch": 20.526315789473685, "grad_norm": 0.1646420657634735, "learning_rate": 4.268821524314826e-05, "loss": 0.0292, "step": 28470 }, { "epoch": 20.53352559480894, "grad_norm": 0.19258004426956177, "learning_rate": 4.2655502951773623e-05, "loss": 0.0344, "step": 28480 }, { "epoch": 20.540735400144197, "grad_norm": 0.12864691019058228, "learning_rate": 4.262279387313152e-05, "loss": 0.034, "step": 28490 }, { "epoch": 20.54794520547945, "grad_norm": 0.15765826404094696, "learning_rate": 4.259008802153001e-05, "loss": 0.0295, "step": 28500 }, { "epoch": 20.55515501081471, "grad_norm": 0.11059165000915527, "learning_rate": 4.2557385411275735e-05, "loss": 0.0297, "step": 28510 }, { "epoch": 20.562364816149962, "grad_norm": 0.14131613075733185, "learning_rate": 4.252468605667395e-05, "loss": 0.027, "step": 28520 }, { "epoch": 20.56957462148522, "grad_norm": 0.15028928220272064, "learning_rate": 4.2491989972028456e-05, "loss": 0.0314, "step": 28530 }, { "epoch": 20.576784426820474, "grad_norm": 0.19200453162193298, "learning_rate": 4.245929717164161e-05, "loss": 0.0272, "step": 28540 }, { "epoch": 20.58399423215573, "grad_norm": 0.20985189080238342, "learning_rate": 4.242660766981438e-05, "loss": 0.0291, "step": 28550 }, { "epoch": 20.59120403749099, "grad_norm": 0.1777307093143463, "learning_rate": 4.2393921480846264e-05, "loss": 0.0315, "step": 28560 }, { "epoch": 20.598413842826243, "grad_norm": 0.1375213861465454, "learning_rate": 4.236123861903528e-05, "loss": 0.0274, "step": 28570 }, { "epoch": 20.6056236481615, "grad_norm": 0.16307076811790466, "learning_rate": 4.232855909867806e-05, "loss": 0.0286, "step": 28580 }, { "epoch": 20.612833453496755, "grad_norm": 0.13585689663887024, "learning_rate": 4.229588293406972e-05, "loss": 0.0263, "step": 28590 }, { "epoch": 20.620043258832013, "grad_norm": 0.12004224210977554, "learning_rate": 4.226321013950392e-05, "loss": 0.0285, "step": 28600 }, { "epoch": 20.627253064167267, "grad_norm": 0.12810739874839783, "learning_rate": 4.223054072927286e-05, "loss": 0.0289, "step": 28610 }, { "epoch": 20.634462869502524, "grad_norm": 0.12137356400489807, "learning_rate": 4.219787471766724e-05, "loss": 0.0287, "step": 28620 }, { "epoch": 20.64167267483778, "grad_norm": 0.16377483308315277, "learning_rate": 4.216521211897629e-05, "loss": 0.0334, "step": 28630 }, { "epoch": 20.648882480173036, "grad_norm": 0.1614648848772049, "learning_rate": 4.2132552947487734e-05, "loss": 0.0302, "step": 28640 }, { "epoch": 20.65609228550829, "grad_norm": 0.13617131114006042, "learning_rate": 4.209989721748783e-05, "loss": 0.0276, "step": 28650 }, { "epoch": 20.663302090843548, "grad_norm": 0.19154755771160126, "learning_rate": 4.206724494326127e-05, "loss": 0.026, "step": 28660 }, { "epoch": 20.6705118961788, "grad_norm": 0.12667718529701233, "learning_rate": 4.203459613909126e-05, "loss": 0.0236, "step": 28670 }, { "epoch": 20.67772170151406, "grad_norm": 0.1350371390581131, "learning_rate": 4.200195081925955e-05, "loss": 0.0325, "step": 28680 }, { "epoch": 20.684931506849313, "grad_norm": 0.1343497633934021, "learning_rate": 4.196930899804627e-05, "loss": 0.0339, "step": 28690 }, { "epoch": 20.69214131218457, "grad_norm": 0.1926865428686142, "learning_rate": 4.1936670689730044e-05, "loss": 0.0275, "step": 28700 }, { "epoch": 20.69935111751983, "grad_norm": 0.11909488588571548, "learning_rate": 4.190403590858804e-05, "loss": 0.0252, "step": 28710 }, { "epoch": 20.706560922855083, "grad_norm": 0.14650647342205048, "learning_rate": 4.187140466889575e-05, "loss": 0.028, "step": 28720 }, { "epoch": 20.71377072819034, "grad_norm": 0.14754296839237213, "learning_rate": 4.183877698492724e-05, "loss": 0.0293, "step": 28730 }, { "epoch": 20.720980533525594, "grad_norm": 0.1820576936006546, "learning_rate": 4.1806152870954935e-05, "loss": 0.0279, "step": 28740 }, { "epoch": 20.728190338860852, "grad_norm": 0.10342000424861908, "learning_rate": 4.177353234124976e-05, "loss": 0.0269, "step": 28750 }, { "epoch": 20.735400144196106, "grad_norm": 0.14736582338809967, "learning_rate": 4.1740915410081e-05, "loss": 0.0266, "step": 28760 }, { "epoch": 20.742609949531364, "grad_norm": 0.1350865215063095, "learning_rate": 4.170830209171646e-05, "loss": 0.0277, "step": 28770 }, { "epoch": 20.749819754866618, "grad_norm": 0.12090031802654266, "learning_rate": 4.1675692400422286e-05, "loss": 0.0313, "step": 28780 }, { "epoch": 20.757029560201875, "grad_norm": 0.17491473257541656, "learning_rate": 4.164308635046304e-05, "loss": 0.03, "step": 28790 }, { "epoch": 20.76423936553713, "grad_norm": 0.14618167281150818, "learning_rate": 4.161048395610177e-05, "loss": 0.0312, "step": 28800 }, { "epoch": 20.771449170872387, "grad_norm": 0.312477707862854, "learning_rate": 4.157788523159985e-05, "loss": 0.0289, "step": 28810 }, { "epoch": 20.77865897620764, "grad_norm": 0.16520513594150543, "learning_rate": 4.154529019121706e-05, "loss": 0.0268, "step": 28820 }, { "epoch": 20.7858687815429, "grad_norm": 0.1325131207704544, "learning_rate": 4.1512698849211564e-05, "loss": 0.028, "step": 28830 }, { "epoch": 20.793078586878153, "grad_norm": 0.19502666592597961, "learning_rate": 4.1480111219839954e-05, "loss": 0.027, "step": 28840 }, { "epoch": 20.80028839221341, "grad_norm": 0.13791444897651672, "learning_rate": 4.144752731735714e-05, "loss": 0.0286, "step": 28850 }, { "epoch": 20.807498197548668, "grad_norm": 0.10226137936115265, "learning_rate": 4.1414947156016446e-05, "loss": 0.027, "step": 28860 }, { "epoch": 20.814708002883922, "grad_norm": 0.15498068928718567, "learning_rate": 4.138237075006953e-05, "loss": 0.0322, "step": 28870 }, { "epoch": 20.82191780821918, "grad_norm": 0.20166577398777008, "learning_rate": 4.134979811376645e-05, "loss": 0.0325, "step": 28880 }, { "epoch": 20.829127613554434, "grad_norm": 0.1251753568649292, "learning_rate": 4.131722926135552e-05, "loss": 0.0299, "step": 28890 }, { "epoch": 20.83633741888969, "grad_norm": 0.11793962121009827, "learning_rate": 4.1284664207083526e-05, "loss": 0.0253, "step": 28900 }, { "epoch": 20.843547224224945, "grad_norm": 0.1540427953004837, "learning_rate": 4.125210296519551e-05, "loss": 0.0326, "step": 28910 }, { "epoch": 20.850757029560203, "grad_norm": 0.13289843499660492, "learning_rate": 4.121954554993484e-05, "loss": 0.0284, "step": 28920 }, { "epoch": 20.857966834895457, "grad_norm": 0.14918288588523865, "learning_rate": 4.118699197554327e-05, "loss": 0.0273, "step": 28930 }, { "epoch": 20.865176640230715, "grad_norm": 0.12550203502178192, "learning_rate": 4.1154442256260814e-05, "loss": 0.0324, "step": 28940 }, { "epoch": 20.87238644556597, "grad_norm": 0.1390131562948227, "learning_rate": 4.1121896406325824e-05, "loss": 0.0268, "step": 28950 }, { "epoch": 20.879596250901226, "grad_norm": 0.1440308392047882, "learning_rate": 4.108935443997499e-05, "loss": 0.0271, "step": 28960 }, { "epoch": 20.88680605623648, "grad_norm": 0.14118917286396027, "learning_rate": 4.105681637144325e-05, "loss": 0.0259, "step": 28970 }, { "epoch": 20.894015861571738, "grad_norm": 0.10381423681974411, "learning_rate": 4.102428221496387e-05, "loss": 0.0273, "step": 28980 }, { "epoch": 20.901225666906992, "grad_norm": 0.13206204771995544, "learning_rate": 4.099175198476838e-05, "loss": 0.0305, "step": 28990 }, { "epoch": 20.90843547224225, "grad_norm": 0.14805376529693604, "learning_rate": 4.0959225695086635e-05, "loss": 0.032, "step": 29000 }, { "epoch": 20.915645277577504, "grad_norm": 0.13286042213439941, "learning_rate": 4.0926703360146724e-05, "loss": 0.0285, "step": 29010 }, { "epoch": 20.92285508291276, "grad_norm": 0.13596384227275848, "learning_rate": 4.0894184994175e-05, "loss": 0.0311, "step": 29020 }, { "epoch": 20.93006488824802, "grad_norm": 0.18493705987930298, "learning_rate": 4.086167061139614e-05, "loss": 0.0309, "step": 29030 }, { "epoch": 20.937274693583273, "grad_norm": 0.13355670869350433, "learning_rate": 4.082916022603303e-05, "loss": 0.0244, "step": 29040 }, { "epoch": 20.94448449891853, "grad_norm": 0.1590198576450348, "learning_rate": 4.079665385230678e-05, "loss": 0.0289, "step": 29050 }, { "epoch": 20.951694304253785, "grad_norm": 0.14535360038280487, "learning_rate": 4.076415150443683e-05, "loss": 0.0251, "step": 29060 }, { "epoch": 20.958904109589042, "grad_norm": 0.14764092862606049, "learning_rate": 4.073165319664079e-05, "loss": 0.028, "step": 29070 }, { "epoch": 20.966113914924296, "grad_norm": 0.10327587276697159, "learning_rate": 4.069915894313451e-05, "loss": 0.0247, "step": 29080 }, { "epoch": 20.973323720259554, "grad_norm": 0.1515127569437027, "learning_rate": 4.066666875813212e-05, "loss": 0.0294, "step": 29090 }, { "epoch": 20.980533525594808, "grad_norm": 0.21042564511299133, "learning_rate": 4.063418265584589e-05, "loss": 0.039, "step": 29100 }, { "epoch": 20.987743330930066, "grad_norm": 0.127043679356575, "learning_rate": 4.060170065048636e-05, "loss": 0.029, "step": 29110 }, { "epoch": 20.99495313626532, "grad_norm": 0.13146589696407318, "learning_rate": 4.056922275626227e-05, "loss": 0.0293, "step": 29120 }, { "epoch": 21.002162941600577, "grad_norm": 0.2128334492444992, "learning_rate": 4.053674898738056e-05, "loss": 0.0298, "step": 29130 }, { "epoch": 21.00937274693583, "grad_norm": 0.14927396178245544, "learning_rate": 4.050427935804633e-05, "loss": 0.0268, "step": 29140 }, { "epoch": 21.01658255227109, "grad_norm": 0.12367544323205948, "learning_rate": 4.047181388246293e-05, "loss": 0.0266, "step": 29150 }, { "epoch": 21.023792357606343, "grad_norm": 0.15317842364311218, "learning_rate": 4.043935257483185e-05, "loss": 0.0257, "step": 29160 }, { "epoch": 21.0310021629416, "grad_norm": 0.15207915008068085, "learning_rate": 4.0406895449352775e-05, "loss": 0.0302, "step": 29170 }, { "epoch": 21.03821196827686, "grad_norm": 0.09081102162599564, "learning_rate": 4.0374442520223525e-05, "loss": 0.0234, "step": 29180 }, { "epoch": 21.045421773612112, "grad_norm": 0.14562785625457764, "learning_rate": 4.0341993801640155e-05, "loss": 0.0298, "step": 29190 }, { "epoch": 21.05263157894737, "grad_norm": 0.15768815577030182, "learning_rate": 4.030954930779681e-05, "loss": 0.027, "step": 29200 }, { "epoch": 21.059841384282624, "grad_norm": 0.14392223954200745, "learning_rate": 4.027710905288582e-05, "loss": 0.0318, "step": 29210 }, { "epoch": 21.06705118961788, "grad_norm": 0.12204724550247192, "learning_rate": 4.0244673051097666e-05, "loss": 0.0267, "step": 29220 }, { "epoch": 21.074260994953136, "grad_norm": 0.131956547498703, "learning_rate": 4.021224131662095e-05, "loss": 0.0313, "step": 29230 }, { "epoch": 21.081470800288393, "grad_norm": 0.12681418657302856, "learning_rate": 4.0179813863642396e-05, "loss": 0.0268, "step": 29240 }, { "epoch": 21.088680605623647, "grad_norm": 0.1388396918773651, "learning_rate": 4.014739070634691e-05, "loss": 0.0306, "step": 29250 }, { "epoch": 21.095890410958905, "grad_norm": 0.17361922562122345, "learning_rate": 4.011497185891746e-05, "loss": 0.0282, "step": 29260 }, { "epoch": 21.10310021629416, "grad_norm": 0.09854211658239365, "learning_rate": 4.008255733553513e-05, "loss": 0.0293, "step": 29270 }, { "epoch": 21.110310021629417, "grad_norm": 0.1549656093120575, "learning_rate": 4.0050147150379194e-05, "loss": 0.0319, "step": 29280 }, { "epoch": 21.11751982696467, "grad_norm": 0.08535530418157578, "learning_rate": 4.001774131762692e-05, "loss": 0.0296, "step": 29290 }, { "epoch": 21.12472963229993, "grad_norm": 0.1256651133298874, "learning_rate": 3.9985339851453705e-05, "loss": 0.03, "step": 29300 }, { "epoch": 21.131939437635182, "grad_norm": 0.16189393401145935, "learning_rate": 3.9952942766033114e-05, "loss": 0.0251, "step": 29310 }, { "epoch": 21.13914924297044, "grad_norm": 0.1420557051897049, "learning_rate": 3.9920550075536686e-05, "loss": 0.0296, "step": 29320 }, { "epoch": 21.146359048305694, "grad_norm": 0.11949613690376282, "learning_rate": 3.988816179413408e-05, "loss": 0.0282, "step": 29330 }, { "epoch": 21.15356885364095, "grad_norm": 0.1171443983912468, "learning_rate": 3.985577793599307e-05, "loss": 0.031, "step": 29340 }, { "epoch": 21.16077865897621, "grad_norm": 0.16321265697479248, "learning_rate": 3.982339851527942e-05, "loss": 0.0286, "step": 29350 }, { "epoch": 21.167988464311463, "grad_norm": 0.10974498093128204, "learning_rate": 3.979102354615701e-05, "loss": 0.0275, "step": 29360 }, { "epoch": 21.17519826964672, "grad_norm": 0.17116934061050415, "learning_rate": 3.975865304278772e-05, "loss": 0.0317, "step": 29370 }, { "epoch": 21.182408074981975, "grad_norm": 0.1460031419992447, "learning_rate": 3.9726287019331546e-05, "loss": 0.0221, "step": 29380 }, { "epoch": 21.189617880317233, "grad_norm": 0.1183817908167839, "learning_rate": 3.9693925489946474e-05, "loss": 0.0244, "step": 29390 }, { "epoch": 21.196827685652487, "grad_norm": 0.11949519068002701, "learning_rate": 3.966156846878852e-05, "loss": 0.027, "step": 29400 }, { "epoch": 21.204037490987744, "grad_norm": 0.1577204465866089, "learning_rate": 3.962921597001177e-05, "loss": 0.0326, "step": 29410 }, { "epoch": 21.211247296323, "grad_norm": 0.17035922408103943, "learning_rate": 3.9596868007768294e-05, "loss": 0.0264, "step": 29420 }, { "epoch": 21.218457101658256, "grad_norm": 0.15051868557929993, "learning_rate": 3.9564524596208165e-05, "loss": 0.0301, "step": 29430 }, { "epoch": 21.22566690699351, "grad_norm": 0.16049538552761078, "learning_rate": 3.953218574947954e-05, "loss": 0.0283, "step": 29440 }, { "epoch": 21.232876712328768, "grad_norm": 0.1595298796892166, "learning_rate": 3.949985148172849e-05, "loss": 0.0311, "step": 29450 }, { "epoch": 21.24008651766402, "grad_norm": 0.1315220296382904, "learning_rate": 3.946752180709914e-05, "loss": 0.0254, "step": 29460 }, { "epoch": 21.24729632299928, "grad_norm": 0.17160168290138245, "learning_rate": 3.943519673973357e-05, "loss": 0.0322, "step": 29470 }, { "epoch": 21.254506128334533, "grad_norm": 0.09377387166023254, "learning_rate": 3.940287629377189e-05, "loss": 0.0284, "step": 29480 }, { "epoch": 21.26171593366979, "grad_norm": 0.13550354540348053, "learning_rate": 3.937056048335214e-05, "loss": 0.0286, "step": 29490 }, { "epoch": 21.26892573900505, "grad_norm": 0.13093405961990356, "learning_rate": 3.933824932261037e-05, "loss": 0.0298, "step": 29500 }, { "epoch": 21.276135544340303, "grad_norm": 0.12238994240760803, "learning_rate": 3.9305942825680576e-05, "loss": 0.024, "step": 29510 }, { "epoch": 21.28334534967556, "grad_norm": 0.11019197106361389, "learning_rate": 3.9273641006694706e-05, "loss": 0.0286, "step": 29520 }, { "epoch": 21.290555155010814, "grad_norm": 0.13689535856246948, "learning_rate": 3.924134387978266e-05, "loss": 0.0284, "step": 29530 }, { "epoch": 21.297764960346072, "grad_norm": 0.20211510360240936, "learning_rate": 3.920905145907234e-05, "loss": 0.0318, "step": 29540 }, { "epoch": 21.304974765681326, "grad_norm": 0.1302235722541809, "learning_rate": 3.9176763758689525e-05, "loss": 0.0238, "step": 29550 }, { "epoch": 21.312184571016584, "grad_norm": 0.12037567049264908, "learning_rate": 3.9144480792757925e-05, "loss": 0.0282, "step": 29560 }, { "epoch": 21.319394376351838, "grad_norm": 0.09507909417152405, "learning_rate": 3.911220257539926e-05, "loss": 0.026, "step": 29570 }, { "epoch": 21.326604181687095, "grad_norm": 0.12336663156747818, "learning_rate": 3.907992912073308e-05, "loss": 0.0249, "step": 29580 }, { "epoch": 21.33381398702235, "grad_norm": 0.12465252727270126, "learning_rate": 3.90476604428769e-05, "loss": 0.0271, "step": 29590 }, { "epoch": 21.341023792357607, "grad_norm": 0.12527967989444733, "learning_rate": 3.901539655594614e-05, "loss": 0.0314, "step": 29600 }, { "epoch": 21.34823359769286, "grad_norm": 0.12091535329818726, "learning_rate": 3.898313747405413e-05, "loss": 0.0275, "step": 29610 }, { "epoch": 21.35544340302812, "grad_norm": 0.13700523972511292, "learning_rate": 3.895088321131205e-05, "loss": 0.0322, "step": 29620 }, { "epoch": 21.362653208363373, "grad_norm": 0.14077933132648468, "learning_rate": 3.891863378182907e-05, "loss": 0.0279, "step": 29630 }, { "epoch": 21.36986301369863, "grad_norm": 0.11024966835975647, "learning_rate": 3.8886389199712145e-05, "loss": 0.0265, "step": 29640 }, { "epoch": 21.377072819033884, "grad_norm": 0.14552263915538788, "learning_rate": 3.885414947906614e-05, "loss": 0.0251, "step": 29650 }, { "epoch": 21.384282624369142, "grad_norm": 0.18635110557079315, "learning_rate": 3.882191463399385e-05, "loss": 0.0283, "step": 29660 }, { "epoch": 21.3914924297044, "grad_norm": 0.15548813343048096, "learning_rate": 3.878968467859585e-05, "loss": 0.0284, "step": 29670 }, { "epoch": 21.398702235039654, "grad_norm": 0.1341448873281479, "learning_rate": 3.875745962697063e-05, "loss": 0.0303, "step": 29680 }, { "epoch": 21.40591204037491, "grad_norm": 0.13332337141036987, "learning_rate": 3.872523949321454e-05, "loss": 0.029, "step": 29690 }, { "epoch": 21.413121845710165, "grad_norm": 0.15364061295986176, "learning_rate": 3.869302429142173e-05, "loss": 0.0303, "step": 29700 }, { "epoch": 21.420331651045423, "grad_norm": 0.1133805587887764, "learning_rate": 3.866081403568425e-05, "loss": 0.0251, "step": 29710 }, { "epoch": 21.427541456380677, "grad_norm": 0.12758837640285492, "learning_rate": 3.8628608740091934e-05, "loss": 0.0258, "step": 29720 }, { "epoch": 21.434751261715935, "grad_norm": 0.1571931093931198, "learning_rate": 3.85964084187325e-05, "loss": 0.0293, "step": 29730 }, { "epoch": 21.44196106705119, "grad_norm": 0.18416330218315125, "learning_rate": 3.856421308569146e-05, "loss": 0.0279, "step": 29740 }, { "epoch": 21.449170872386446, "grad_norm": 0.15897950530052185, "learning_rate": 3.853202275505212e-05, "loss": 0.0263, "step": 29750 }, { "epoch": 21.4563806777217, "grad_norm": 0.17581364512443542, "learning_rate": 3.849983744089565e-05, "loss": 0.0312, "step": 29760 }, { "epoch": 21.463590483056958, "grad_norm": 0.09534513205289841, "learning_rate": 3.8467657157301e-05, "loss": 0.0271, "step": 29770 }, { "epoch": 21.470800288392212, "grad_norm": 0.12733234465122223, "learning_rate": 3.8435481918344884e-05, "loss": 0.0272, "step": 29780 }, { "epoch": 21.47801009372747, "grad_norm": 0.1124878078699112, "learning_rate": 3.84033117381019e-05, "loss": 0.0265, "step": 29790 }, { "epoch": 21.485219899062724, "grad_norm": 0.11293993145227432, "learning_rate": 3.8371146630644335e-05, "loss": 0.0304, "step": 29800 }, { "epoch": 21.49242970439798, "grad_norm": 0.12351049482822418, "learning_rate": 3.833898661004231e-05, "loss": 0.0308, "step": 29810 }, { "epoch": 21.49963950973324, "grad_norm": 0.19617973268032074, "learning_rate": 3.830683169036372e-05, "loss": 0.0314, "step": 29820 }, { "epoch": 21.506849315068493, "grad_norm": 0.12962515652179718, "learning_rate": 3.827468188567422e-05, "loss": 0.0293, "step": 29830 }, { "epoch": 21.51405912040375, "grad_norm": 0.164401575922966, "learning_rate": 3.82425372100372e-05, "loss": 0.0263, "step": 29840 }, { "epoch": 21.521268925739005, "grad_norm": 0.17428793013095856, "learning_rate": 3.821039767751387e-05, "loss": 0.0291, "step": 29850 }, { "epoch": 21.528478731074262, "grad_norm": 0.13162925839424133, "learning_rate": 3.817826330216315e-05, "loss": 0.028, "step": 29860 }, { "epoch": 21.535688536409516, "grad_norm": 0.2094685286283493, "learning_rate": 3.814613409804169e-05, "loss": 0.0304, "step": 29870 }, { "epoch": 21.542898341744774, "grad_norm": 0.1266988217830658, "learning_rate": 3.811401007920388e-05, "loss": 0.0312, "step": 29880 }, { "epoch": 21.550108147080028, "grad_norm": 0.12017539143562317, "learning_rate": 3.8081891259701895e-05, "loss": 0.0309, "step": 29890 }, { "epoch": 21.557317952415286, "grad_norm": 0.13681189715862274, "learning_rate": 3.8049777653585584e-05, "loss": 0.0231, "step": 29900 }, { "epoch": 21.56452775775054, "grad_norm": 0.1241372674703598, "learning_rate": 3.80176692749025e-05, "loss": 0.0266, "step": 29910 }, { "epoch": 21.571737563085797, "grad_norm": 0.1586608588695526, "learning_rate": 3.798556613769799e-05, "loss": 0.0289, "step": 29920 }, { "epoch": 21.57894736842105, "grad_norm": 0.13113677501678467, "learning_rate": 3.795346825601502e-05, "loss": 0.0241, "step": 29930 }, { "epoch": 21.58615717375631, "grad_norm": 0.21091893315315247, "learning_rate": 3.7921375643894305e-05, "loss": 0.0271, "step": 29940 }, { "epoch": 21.593366979091563, "grad_norm": 0.13115927577018738, "learning_rate": 3.788928831537424e-05, "loss": 0.0311, "step": 29950 }, { "epoch": 21.60057678442682, "grad_norm": 0.1548406034708023, "learning_rate": 3.7857206284490934e-05, "loss": 0.0284, "step": 29960 }, { "epoch": 21.607786589762078, "grad_norm": 0.09853696078062057, "learning_rate": 3.7825129565278114e-05, "loss": 0.0305, "step": 29970 }, { "epoch": 21.614996395097332, "grad_norm": 0.18228186666965485, "learning_rate": 3.779305817176728e-05, "loss": 0.0256, "step": 29980 }, { "epoch": 21.62220620043259, "grad_norm": 0.16098661720752716, "learning_rate": 3.776099211798751e-05, "loss": 0.0288, "step": 29990 }, { "epoch": 21.629416005767844, "grad_norm": 0.15465790033340454, "learning_rate": 3.7728931417965584e-05, "loss": 0.0273, "step": 30000 }, { "epoch": 21.6366258111031, "grad_norm": 0.1342528611421585, "learning_rate": 3.769687608572598e-05, "loss": 0.0282, "step": 30010 }, { "epoch": 21.643835616438356, "grad_norm": 0.10331156104803085, "learning_rate": 3.766482613529076e-05, "loss": 0.0315, "step": 30020 }, { "epoch": 21.651045421773613, "grad_norm": 0.17096005380153656, "learning_rate": 3.763278158067964e-05, "loss": 0.0284, "step": 30030 }, { "epoch": 21.658255227108867, "grad_norm": 0.14079590141773224, "learning_rate": 3.7600742435910045e-05, "loss": 0.031, "step": 30040 }, { "epoch": 21.665465032444125, "grad_norm": 0.1455780267715454, "learning_rate": 3.756870871499696e-05, "loss": 0.0296, "step": 30050 }, { "epoch": 21.67267483777938, "grad_norm": 0.14664942026138306, "learning_rate": 3.7536680431953025e-05, "loss": 0.0282, "step": 30060 }, { "epoch": 21.679884643114637, "grad_norm": 0.18156255781650543, "learning_rate": 3.7504657600788485e-05, "loss": 0.0249, "step": 30070 }, { "epoch": 21.68709444844989, "grad_norm": 0.155909463763237, "learning_rate": 3.747264023551125e-05, "loss": 0.0254, "step": 30080 }, { "epoch": 21.69430425378515, "grad_norm": 0.14591626822948456, "learning_rate": 3.744062835012678e-05, "loss": 0.0263, "step": 30090 }, { "epoch": 21.701514059120402, "grad_norm": 0.11995352059602737, "learning_rate": 3.740862195863814e-05, "loss": 0.0286, "step": 30100 }, { "epoch": 21.70872386445566, "grad_norm": 0.16430726647377014, "learning_rate": 3.737662107504606e-05, "loss": 0.0275, "step": 30110 }, { "epoch": 21.715933669790914, "grad_norm": 0.15133686363697052, "learning_rate": 3.734462571334879e-05, "loss": 0.0293, "step": 30120 }, { "epoch": 21.72314347512617, "grad_norm": 0.11875874549150467, "learning_rate": 3.731263588754217e-05, "loss": 0.0288, "step": 30130 }, { "epoch": 21.73035328046143, "grad_norm": 0.126652792096138, "learning_rate": 3.7280651611619665e-05, "loss": 0.029, "step": 30140 }, { "epoch": 21.737563085796683, "grad_norm": 0.20490235090255737, "learning_rate": 3.724867289957229e-05, "loss": 0.0343, "step": 30150 }, { "epoch": 21.74477289113194, "grad_norm": 0.18918561935424805, "learning_rate": 3.7216699765388576e-05, "loss": 0.0319, "step": 30160 }, { "epoch": 21.751982696467195, "grad_norm": 0.1866980940103531, "learning_rate": 3.71847322230547e-05, "loss": 0.0314, "step": 30170 }, { "epoch": 21.759192501802453, "grad_norm": 0.1415598839521408, "learning_rate": 3.7152770286554335e-05, "loss": 0.0315, "step": 30180 }, { "epoch": 21.766402307137707, "grad_norm": 0.20079316198825836, "learning_rate": 3.712081396986872e-05, "loss": 0.03, "step": 30190 }, { "epoch": 21.773612112472964, "grad_norm": 0.13433139026165009, "learning_rate": 3.708886328697663e-05, "loss": 0.0347, "step": 30200 }, { "epoch": 21.78082191780822, "grad_norm": 0.13382872939109802, "learning_rate": 3.7056918251854395e-05, "loss": 0.0301, "step": 30210 }, { "epoch": 21.788031723143476, "grad_norm": 0.11723022162914276, "learning_rate": 3.7024978878475846e-05, "loss": 0.0247, "step": 30220 }, { "epoch": 21.79524152847873, "grad_norm": 0.17582888901233673, "learning_rate": 3.699304518081233e-05, "loss": 0.03, "step": 30230 }, { "epoch": 21.802451333813988, "grad_norm": 0.15244628489017487, "learning_rate": 3.696111717283277e-05, "loss": 0.0348, "step": 30240 }, { "epoch": 21.80966113914924, "grad_norm": 0.16472454369068146, "learning_rate": 3.692919486850355e-05, "loss": 0.027, "step": 30250 }, { "epoch": 21.8168709444845, "grad_norm": 0.12666839361190796, "learning_rate": 3.689727828178854e-05, "loss": 0.0214, "step": 30260 }, { "epoch": 21.824080749819753, "grad_norm": 0.12139247357845306, "learning_rate": 3.6865367426649185e-05, "loss": 0.03, "step": 30270 }, { "epoch": 21.83129055515501, "grad_norm": 0.1058996245265007, "learning_rate": 3.6833462317044346e-05, "loss": 0.0274, "step": 30280 }, { "epoch": 21.838500360490265, "grad_norm": 0.21169210970401764, "learning_rate": 3.6801562966930406e-05, "loss": 0.0279, "step": 30290 }, { "epoch": 21.845710165825523, "grad_norm": 0.11817483603954315, "learning_rate": 3.676966939026123e-05, "loss": 0.0294, "step": 30300 }, { "epoch": 21.85291997116078, "grad_norm": 0.16546013951301575, "learning_rate": 3.6737781600988166e-05, "loss": 0.027, "step": 30310 }, { "epoch": 21.860129776496034, "grad_norm": 0.1326947659254074, "learning_rate": 3.670589961305998e-05, "loss": 0.0291, "step": 30320 }, { "epoch": 21.867339581831292, "grad_norm": 0.204656720161438, "learning_rate": 3.667402344042298e-05, "loss": 0.0265, "step": 30330 }, { "epoch": 21.874549387166546, "grad_norm": 0.15419742465019226, "learning_rate": 3.664215309702086e-05, "loss": 0.0285, "step": 30340 }, { "epoch": 21.881759192501804, "grad_norm": 0.10448312014341354, "learning_rate": 3.661028859679478e-05, "loss": 0.0295, "step": 30350 }, { "epoch": 21.888968997837058, "grad_norm": 0.1546034812927246, "learning_rate": 3.657842995368339e-05, "loss": 0.0251, "step": 30360 }, { "epoch": 21.896178803172315, "grad_norm": 0.15171106159687042, "learning_rate": 3.654657718162273e-05, "loss": 0.0227, "step": 30370 }, { "epoch": 21.90338860850757, "grad_norm": 0.11313994973897934, "learning_rate": 3.6514730294546275e-05, "loss": 0.0317, "step": 30380 }, { "epoch": 21.910598413842827, "grad_norm": 0.15972986817359924, "learning_rate": 3.648288930638492e-05, "loss": 0.0301, "step": 30390 }, { "epoch": 21.91780821917808, "grad_norm": 0.21954481303691864, "learning_rate": 3.645105423106704e-05, "loss": 0.0335, "step": 30400 }, { "epoch": 21.92501802451334, "grad_norm": 0.18884523212909698, "learning_rate": 3.6419225082518336e-05, "loss": 0.0314, "step": 30410 }, { "epoch": 21.932227829848593, "grad_norm": 0.12014813721179962, "learning_rate": 3.6387401874661975e-05, "loss": 0.0253, "step": 30420 }, { "epoch": 21.93943763518385, "grad_norm": 0.1476447582244873, "learning_rate": 3.6355584621418516e-05, "loss": 0.027, "step": 30430 }, { "epoch": 21.946647440519104, "grad_norm": 0.1537926346063614, "learning_rate": 3.632377333670591e-05, "loss": 0.0267, "step": 30440 }, { "epoch": 21.953857245854362, "grad_norm": 0.14537841081619263, "learning_rate": 3.629196803443946e-05, "loss": 0.0239, "step": 30450 }, { "epoch": 21.96106705118962, "grad_norm": 0.19136768579483032, "learning_rate": 3.626016872853193e-05, "loss": 0.0304, "step": 30460 }, { "epoch": 21.968276856524874, "grad_norm": 0.11793424934148788, "learning_rate": 3.6228375432893405e-05, "loss": 0.0291, "step": 30470 }, { "epoch": 21.97548666186013, "grad_norm": 0.16229695081710815, "learning_rate": 3.619658816143131e-05, "loss": 0.0347, "step": 30480 }, { "epoch": 21.982696467195385, "grad_norm": 0.1393337994813919, "learning_rate": 3.6164806928050544e-05, "loss": 0.0253, "step": 30490 }, { "epoch": 21.989906272530643, "grad_norm": 0.1718035191297531, "learning_rate": 3.613303174665327e-05, "loss": 0.0284, "step": 30500 }, { "epoch": 21.997116077865897, "grad_norm": 0.18482109904289246, "learning_rate": 3.6101262631138996e-05, "loss": 0.0271, "step": 30510 }, { "epoch": 22.004325883201155, "grad_norm": 0.13090455532073975, "learning_rate": 3.606949959540467e-05, "loss": 0.0298, "step": 30520 }, { "epoch": 22.01153568853641, "grad_norm": 0.16511261463165283, "learning_rate": 3.603774265334449e-05, "loss": 0.0257, "step": 30530 }, { "epoch": 22.018745493871666, "grad_norm": 0.19995437562465668, "learning_rate": 3.600599181885003e-05, "loss": 0.0321, "step": 30540 }, { "epoch": 22.02595529920692, "grad_norm": 0.14847224950790405, "learning_rate": 3.597424710581018e-05, "loss": 0.0308, "step": 30550 }, { "epoch": 22.033165104542178, "grad_norm": 0.14359402656555176, "learning_rate": 3.5942508528111164e-05, "loss": 0.0278, "step": 30560 }, { "epoch": 22.040374909877432, "grad_norm": 0.1771557629108429, "learning_rate": 3.591077609963651e-05, "loss": 0.0306, "step": 30570 }, { "epoch": 22.04758471521269, "grad_norm": 0.1016196459531784, "learning_rate": 3.587904983426704e-05, "loss": 0.025, "step": 30580 }, { "epoch": 22.054794520547944, "grad_norm": 0.1471693515777588, "learning_rate": 3.584732974588094e-05, "loss": 0.0332, "step": 30590 }, { "epoch": 22.0620043258832, "grad_norm": 0.12488993257284164, "learning_rate": 3.5815615848353645e-05, "loss": 0.0257, "step": 30600 }, { "epoch": 22.06921413121846, "grad_norm": 0.168031707406044, "learning_rate": 3.578390815555784e-05, "loss": 0.0275, "step": 30610 }, { "epoch": 22.076423936553713, "grad_norm": 0.10683932900428772, "learning_rate": 3.5752206681363626e-05, "loss": 0.0352, "step": 30620 }, { "epoch": 22.08363374188897, "grad_norm": 0.15790867805480957, "learning_rate": 3.572051143963826e-05, "loss": 0.0275, "step": 30630 }, { "epoch": 22.090843547224225, "grad_norm": 0.20937779545783997, "learning_rate": 3.5688822444246294e-05, "loss": 0.0224, "step": 30640 }, { "epoch": 22.098053352559482, "grad_norm": 0.1199362576007843, "learning_rate": 3.565713970904963e-05, "loss": 0.023, "step": 30650 }, { "epoch": 22.105263157894736, "grad_norm": 0.15026149153709412, "learning_rate": 3.562546324790733e-05, "loss": 0.0263, "step": 30660 }, { "epoch": 22.112472963229994, "grad_norm": 0.16610834002494812, "learning_rate": 3.559379307467577e-05, "loss": 0.0286, "step": 30670 }, { "epoch": 22.119682768565248, "grad_norm": 0.12083172798156738, "learning_rate": 3.556212920320856e-05, "loss": 0.0276, "step": 30680 }, { "epoch": 22.126892573900506, "grad_norm": 0.1456412374973297, "learning_rate": 3.553047164735655e-05, "loss": 0.028, "step": 30690 }, { "epoch": 22.13410237923576, "grad_norm": 0.14865298569202423, "learning_rate": 3.549882042096783e-05, "loss": 0.0281, "step": 30700 }, { "epoch": 22.141312184571017, "grad_norm": 0.11248129606246948, "learning_rate": 3.546717553788772e-05, "loss": 0.0316, "step": 30710 }, { "epoch": 22.14852198990627, "grad_norm": 0.1898188591003418, "learning_rate": 3.543553701195879e-05, "loss": 0.0284, "step": 30720 }, { "epoch": 22.15573179524153, "grad_norm": 0.16147582232952118, "learning_rate": 3.540390485702077e-05, "loss": 0.0299, "step": 30730 }, { "epoch": 22.162941600576783, "grad_norm": 0.15974076092243195, "learning_rate": 3.5372279086910645e-05, "loss": 0.0342, "step": 30740 }, { "epoch": 22.17015140591204, "grad_norm": 0.19261033833026886, "learning_rate": 3.5340659715462635e-05, "loss": 0.029, "step": 30750 }, { "epoch": 22.177361211247295, "grad_norm": 0.17133170366287231, "learning_rate": 3.530904675650808e-05, "loss": 0.0283, "step": 30760 }, { "epoch": 22.184571016582552, "grad_norm": 0.16368667781352997, "learning_rate": 3.527744022387559e-05, "loss": 0.0257, "step": 30770 }, { "epoch": 22.19178082191781, "grad_norm": 0.13598035275936127, "learning_rate": 3.524584013139094e-05, "loss": 0.028, "step": 30780 }, { "epoch": 22.198990627253064, "grad_norm": 0.17131146788597107, "learning_rate": 3.521424649287708e-05, "loss": 0.0319, "step": 30790 }, { "epoch": 22.20620043258832, "grad_norm": 0.22404855489730835, "learning_rate": 3.518265932215412e-05, "loss": 0.0276, "step": 30800 }, { "epoch": 22.213410237923576, "grad_norm": 0.22067420184612274, "learning_rate": 3.51510786330394e-05, "loss": 0.0307, "step": 30810 }, { "epoch": 22.220620043258833, "grad_norm": 0.13919714093208313, "learning_rate": 3.511950443934736e-05, "loss": 0.0244, "step": 30820 }, { "epoch": 22.227829848594087, "grad_norm": 0.11460370570421219, "learning_rate": 3.508793675488961e-05, "loss": 0.0229, "step": 30830 }, { "epoch": 22.235039653929345, "grad_norm": 0.1249944418668747, "learning_rate": 3.505637559347498e-05, "loss": 0.0298, "step": 30840 }, { "epoch": 22.2422494592646, "grad_norm": 0.13281095027923584, "learning_rate": 3.5024820968909345e-05, "loss": 0.0336, "step": 30850 }, { "epoch": 22.249459264599857, "grad_norm": 0.1257549673318863, "learning_rate": 3.4993272894995774e-05, "loss": 0.0284, "step": 30860 }, { "epoch": 22.25666906993511, "grad_norm": 0.16124355792999268, "learning_rate": 3.49617313855345e-05, "loss": 0.0271, "step": 30870 }, { "epoch": 22.26387887527037, "grad_norm": 0.1667853593826294, "learning_rate": 3.4930196454322834e-05, "loss": 0.0299, "step": 30880 }, { "epoch": 22.271088680605622, "grad_norm": 0.11302752792835236, "learning_rate": 3.48986681151552e-05, "loss": 0.0295, "step": 30890 }, { "epoch": 22.27829848594088, "grad_norm": 0.08990278840065002, "learning_rate": 3.4867146381823204e-05, "loss": 0.0234, "step": 30900 }, { "epoch": 22.285508291276134, "grad_norm": 0.14244623482227325, "learning_rate": 3.48356312681155e-05, "loss": 0.027, "step": 30910 }, { "epoch": 22.29271809661139, "grad_norm": 0.10094787925481796, "learning_rate": 3.480412278781788e-05, "loss": 0.026, "step": 30920 }, { "epoch": 22.299927901946646, "grad_norm": 0.11401179432868958, "learning_rate": 3.47726209547132e-05, "loss": 0.0276, "step": 30930 }, { "epoch": 22.307137707281903, "grad_norm": 0.14043380320072174, "learning_rate": 3.474112578258147e-05, "loss": 0.0221, "step": 30940 }, { "epoch": 22.31434751261716, "grad_norm": 0.14814814925193787, "learning_rate": 3.470963728519974e-05, "loss": 0.0251, "step": 30950 }, { "epoch": 22.321557317952415, "grad_norm": 0.1908431500196457, "learning_rate": 3.467815547634211e-05, "loss": 0.0276, "step": 30960 }, { "epoch": 22.328767123287673, "grad_norm": 0.13805760443210602, "learning_rate": 3.4646680369779836e-05, "loss": 0.0283, "step": 30970 }, { "epoch": 22.335976928622927, "grad_norm": 0.2046598196029663, "learning_rate": 3.46152119792812e-05, "loss": 0.0289, "step": 30980 }, { "epoch": 22.343186733958184, "grad_norm": 0.16160447895526886, "learning_rate": 3.45837503186115e-05, "loss": 0.0312, "step": 30990 }, { "epoch": 22.35039653929344, "grad_norm": 0.1353575587272644, "learning_rate": 3.455229540153318e-05, "loss": 0.0242, "step": 31000 }, { "epoch": 22.357606344628696, "grad_norm": 0.13804125785827637, "learning_rate": 3.4520847241805665e-05, "loss": 0.0338, "step": 31010 }, { "epoch": 22.36481614996395, "grad_norm": 0.17077140510082245, "learning_rate": 3.4489405853185465e-05, "loss": 0.028, "step": 31020 }, { "epoch": 22.372025955299208, "grad_norm": 0.18935489654541016, "learning_rate": 3.44579712494261e-05, "loss": 0.0269, "step": 31030 }, { "epoch": 22.37923576063446, "grad_norm": 0.1410508006811142, "learning_rate": 3.442654344427814e-05, "loss": 0.0288, "step": 31040 }, { "epoch": 22.38644556596972, "grad_norm": 0.19774727523326874, "learning_rate": 3.439512245148916e-05, "loss": 0.0313, "step": 31050 }, { "epoch": 22.393655371304973, "grad_norm": 0.13622960448265076, "learning_rate": 3.4363708284803796e-05, "loss": 0.0293, "step": 31060 }, { "epoch": 22.40086517664023, "grad_norm": 0.13466550409793854, "learning_rate": 3.433230095796367e-05, "loss": 0.0281, "step": 31070 }, { "epoch": 22.408074981975485, "grad_norm": 0.1622372716665268, "learning_rate": 3.430090048470739e-05, "loss": 0.032, "step": 31080 }, { "epoch": 22.415284787310743, "grad_norm": 0.15631771087646484, "learning_rate": 3.4269506878770585e-05, "loss": 0.0321, "step": 31090 }, { "epoch": 22.422494592646, "grad_norm": 0.11519571393728256, "learning_rate": 3.4238120153885925e-05, "loss": 0.0288, "step": 31100 }, { "epoch": 22.429704397981254, "grad_norm": 0.1352016031742096, "learning_rate": 3.4206740323782994e-05, "loss": 0.0298, "step": 31110 }, { "epoch": 22.436914203316512, "grad_norm": 0.19617749750614166, "learning_rate": 3.4175367402188395e-05, "loss": 0.0305, "step": 31120 }, { "epoch": 22.444124008651766, "grad_norm": 0.19492687284946442, "learning_rate": 3.414400140282574e-05, "loss": 0.0322, "step": 31130 }, { "epoch": 22.451333813987024, "grad_norm": 0.180286705493927, "learning_rate": 3.411264233941556e-05, "loss": 0.0263, "step": 31140 }, { "epoch": 22.458543619322278, "grad_norm": 0.11338555812835693, "learning_rate": 3.408129022567537e-05, "loss": 0.0323, "step": 31150 }, { "epoch": 22.465753424657535, "grad_norm": 0.15532393753528595, "learning_rate": 3.404994507531968e-05, "loss": 0.0299, "step": 31160 }, { "epoch": 22.47296322999279, "grad_norm": 0.10535138100385666, "learning_rate": 3.40186069020599e-05, "loss": 0.0294, "step": 31170 }, { "epoch": 22.480173035328047, "grad_norm": 0.14704953134059906, "learning_rate": 3.3987275719604386e-05, "loss": 0.0239, "step": 31180 }, { "epoch": 22.4873828406633, "grad_norm": 0.14578837156295776, "learning_rate": 3.3955951541658526e-05, "loss": 0.0276, "step": 31190 }, { "epoch": 22.49459264599856, "grad_norm": 0.13318315148353577, "learning_rate": 3.392463438192454e-05, "loss": 0.0242, "step": 31200 }, { "epoch": 22.501802451333813, "grad_norm": 0.16679930686950684, "learning_rate": 3.38933242541016e-05, "loss": 0.0275, "step": 31210 }, { "epoch": 22.50901225666907, "grad_norm": 0.11721526086330414, "learning_rate": 3.386202117188586e-05, "loss": 0.0246, "step": 31220 }, { "epoch": 22.516222062004324, "grad_norm": 0.15334877371788025, "learning_rate": 3.383072514897033e-05, "loss": 0.028, "step": 31230 }, { "epoch": 22.523431867339582, "grad_norm": 0.1950264722108841, "learning_rate": 3.379943619904494e-05, "loss": 0.0278, "step": 31240 }, { "epoch": 22.53064167267484, "grad_norm": 0.15950369834899902, "learning_rate": 3.3768154335796583e-05, "loss": 0.0273, "step": 31250 }, { "epoch": 22.537851478010094, "grad_norm": 0.15657880902290344, "learning_rate": 3.3736879572908963e-05, "loss": 0.0261, "step": 31260 }, { "epoch": 22.54506128334535, "grad_norm": 0.0946565717458725, "learning_rate": 3.370561192406276e-05, "loss": 0.024, "step": 31270 }, { "epoch": 22.552271088680605, "grad_norm": 0.12894916534423828, "learning_rate": 3.367435140293547e-05, "loss": 0.028, "step": 31280 }, { "epoch": 22.559480894015863, "grad_norm": 0.10030395537614822, "learning_rate": 3.3643098023201545e-05, "loss": 0.0242, "step": 31290 }, { "epoch": 22.566690699351117, "grad_norm": 0.18164803087711334, "learning_rate": 3.361185179853225e-05, "loss": 0.0276, "step": 31300 }, { "epoch": 22.573900504686375, "grad_norm": 0.14105089008808136, "learning_rate": 3.358061274259574e-05, "loss": 0.0272, "step": 31310 }, { "epoch": 22.58111031002163, "grad_norm": 0.11956893652677536, "learning_rate": 3.3549380869057064e-05, "loss": 0.0242, "step": 31320 }, { "epoch": 22.588320115356886, "grad_norm": 0.18018566071987152, "learning_rate": 3.351815619157809e-05, "loss": 0.0294, "step": 31330 }, { "epoch": 22.59552992069214, "grad_norm": 0.1329498589038849, "learning_rate": 3.348693872381754e-05, "loss": 0.0269, "step": 31340 }, { "epoch": 22.602739726027398, "grad_norm": 0.13772007822990417, "learning_rate": 3.3455728479431025e-05, "loss": 0.0231, "step": 31350 }, { "epoch": 22.609949531362652, "grad_norm": 0.1291014403104782, "learning_rate": 3.342452547207094e-05, "loss": 0.0232, "step": 31360 }, { "epoch": 22.61715933669791, "grad_norm": 0.1643933355808258, "learning_rate": 3.3393329715386555e-05, "loss": 0.0297, "step": 31370 }, { "epoch": 22.624369142033164, "grad_norm": 0.1851561963558197, "learning_rate": 3.336214122302395e-05, "loss": 0.0213, "step": 31380 }, { "epoch": 22.63157894736842, "grad_norm": 0.13725951313972473, "learning_rate": 3.333096000862604e-05, "loss": 0.0296, "step": 31390 }, { "epoch": 22.638788752703675, "grad_norm": 0.15709558129310608, "learning_rate": 3.3299786085832516e-05, "loss": 0.0264, "step": 31400 }, { "epoch": 22.645998558038933, "grad_norm": 0.11716639995574951, "learning_rate": 3.3268619468279956e-05, "loss": 0.0262, "step": 31410 }, { "epoch": 22.65320836337419, "grad_norm": 0.07269756495952606, "learning_rate": 3.323746016960167e-05, "loss": 0.0303, "step": 31420 }, { "epoch": 22.660418168709445, "grad_norm": 0.09342976659536362, "learning_rate": 3.3206308203427806e-05, "loss": 0.0229, "step": 31430 }, { "epoch": 22.667627974044702, "grad_norm": 0.15859849750995636, "learning_rate": 3.3175163583385254e-05, "loss": 0.0258, "step": 31440 }, { "epoch": 22.674837779379956, "grad_norm": 0.15506531298160553, "learning_rate": 3.314402632309779e-05, "loss": 0.0265, "step": 31450 }, { "epoch": 22.682047584715214, "grad_norm": 0.16233006119728088, "learning_rate": 3.311289643618587e-05, "loss": 0.0265, "step": 31460 }, { "epoch": 22.689257390050468, "grad_norm": 0.1647736132144928, "learning_rate": 3.308177393626676e-05, "loss": 0.0267, "step": 31470 }, { "epoch": 22.696467195385726, "grad_norm": 0.14278601109981537, "learning_rate": 3.305065883695453e-05, "loss": 0.0223, "step": 31480 }, { "epoch": 22.70367700072098, "grad_norm": 0.13122569024562836, "learning_rate": 3.301955115185996e-05, "loss": 0.0284, "step": 31490 }, { "epoch": 22.710886806056237, "grad_norm": 0.13822036981582642, "learning_rate": 3.29884508945906e-05, "loss": 0.0248, "step": 31500 }, { "epoch": 22.71809661139149, "grad_norm": 0.09299978613853455, "learning_rate": 3.295735807875079e-05, "loss": 0.0315, "step": 31510 }, { "epoch": 22.72530641672675, "grad_norm": 0.13919450342655182, "learning_rate": 3.292627271794156e-05, "loss": 0.0289, "step": 31520 }, { "epoch": 22.732516222062003, "grad_norm": 0.17894449830055237, "learning_rate": 3.289519482576069e-05, "loss": 0.0243, "step": 31530 }, { "epoch": 22.73972602739726, "grad_norm": 0.21210680902004242, "learning_rate": 3.286412441580274e-05, "loss": 0.0275, "step": 31540 }, { "epoch": 22.746935832732515, "grad_norm": 0.1595909744501114, "learning_rate": 3.283306150165896e-05, "loss": 0.0312, "step": 31550 }, { "epoch": 22.754145638067772, "grad_norm": 0.14844033122062683, "learning_rate": 3.2802006096917285e-05, "loss": 0.0243, "step": 31560 }, { "epoch": 22.761355443403026, "grad_norm": 0.17294853925704956, "learning_rate": 3.277095821516245e-05, "loss": 0.0321, "step": 31570 }, { "epoch": 22.768565248738284, "grad_norm": 0.16505767405033112, "learning_rate": 3.273991786997583e-05, "loss": 0.0296, "step": 31580 }, { "epoch": 22.77577505407354, "grad_norm": 0.13434192538261414, "learning_rate": 3.2708885074935514e-05, "loss": 0.0314, "step": 31590 }, { "epoch": 22.782984859408796, "grad_norm": 0.0912645161151886, "learning_rate": 3.267785984361633e-05, "loss": 0.023, "step": 31600 }, { "epoch": 22.790194664744053, "grad_norm": 0.22448264062404633, "learning_rate": 3.2646842189589754e-05, "loss": 0.0392, "step": 31610 }, { "epoch": 22.797404470079307, "grad_norm": 0.1976124346256256, "learning_rate": 3.261583212642397e-05, "loss": 0.0316, "step": 31620 }, { "epoch": 22.804614275414565, "grad_norm": 0.1095246821641922, "learning_rate": 3.25848296676838e-05, "loss": 0.0244, "step": 31630 }, { "epoch": 22.81182408074982, "grad_norm": 0.16505733132362366, "learning_rate": 3.255383482693083e-05, "loss": 0.0271, "step": 31640 }, { "epoch": 22.819033886085077, "grad_norm": 0.12243007123470306, "learning_rate": 3.252284761772322e-05, "loss": 0.0269, "step": 31650 }, { "epoch": 22.82624369142033, "grad_norm": 0.11189734190702438, "learning_rate": 3.249186805361581e-05, "loss": 0.0249, "step": 31660 }, { "epoch": 22.833453496755588, "grad_norm": 0.15795069932937622, "learning_rate": 3.246089614816016e-05, "loss": 0.0271, "step": 31670 }, { "epoch": 22.840663302090842, "grad_norm": 0.13566243648529053, "learning_rate": 3.242993191490441e-05, "loss": 0.0319, "step": 31680 }, { "epoch": 22.8478731074261, "grad_norm": 0.16966557502746582, "learning_rate": 3.239897536739335e-05, "loss": 0.0269, "step": 31690 }, { "epoch": 22.855082912761354, "grad_norm": 0.1419772207736969, "learning_rate": 3.236802651916845e-05, "loss": 0.0283, "step": 31700 }, { "epoch": 22.86229271809661, "grad_norm": 0.18887530267238617, "learning_rate": 3.23370853837678e-05, "loss": 0.0279, "step": 31710 }, { "epoch": 22.869502523431866, "grad_norm": 0.14735285937786102, "learning_rate": 3.2306151974726056e-05, "loss": 0.0283, "step": 31720 }, { "epoch": 22.876712328767123, "grad_norm": 0.1751866638660431, "learning_rate": 3.227522630557459e-05, "loss": 0.0338, "step": 31730 }, { "epoch": 22.88392213410238, "grad_norm": 0.1277783364057541, "learning_rate": 3.224430838984132e-05, "loss": 0.0259, "step": 31740 }, { "epoch": 22.891131939437635, "grad_norm": 0.1488378793001175, "learning_rate": 3.2213398241050784e-05, "loss": 0.0274, "step": 31750 }, { "epoch": 22.898341744772893, "grad_norm": 0.1612502634525299, "learning_rate": 3.218249587272415e-05, "loss": 0.0303, "step": 31760 }, { "epoch": 22.905551550108147, "grad_norm": 0.15862730145454407, "learning_rate": 3.215160129837916e-05, "loss": 0.0281, "step": 31770 }, { "epoch": 22.912761355443404, "grad_norm": 0.14077310264110565, "learning_rate": 3.2120714531530146e-05, "loss": 0.0301, "step": 31780 }, { "epoch": 22.91997116077866, "grad_norm": 0.18073761463165283, "learning_rate": 3.2089835585688014e-05, "loss": 0.0321, "step": 31790 }, { "epoch": 22.927180966113916, "grad_norm": 0.12390346080064774, "learning_rate": 3.20589644743603e-05, "loss": 0.0287, "step": 31800 }, { "epoch": 22.93439077144917, "grad_norm": 0.1169382855296135, "learning_rate": 3.2028101211051054e-05, "loss": 0.0291, "step": 31810 }, { "epoch": 22.941600576784428, "grad_norm": 0.12834803760051727, "learning_rate": 3.19972458092609e-05, "loss": 0.0223, "step": 31820 }, { "epoch": 22.94881038211968, "grad_norm": 0.14180462062358856, "learning_rate": 3.196639828248707e-05, "loss": 0.0283, "step": 31830 }, { "epoch": 22.95602018745494, "grad_norm": 0.13362208008766174, "learning_rate": 3.193555864422329e-05, "loss": 0.0286, "step": 31840 }, { "epoch": 22.963229992790193, "grad_norm": 0.15205049514770508, "learning_rate": 3.19047269079599e-05, "loss": 0.0275, "step": 31850 }, { "epoch": 22.97043979812545, "grad_norm": 0.15070903301239014, "learning_rate": 3.1873903087183734e-05, "loss": 0.0299, "step": 31860 }, { "epoch": 22.977649603460705, "grad_norm": 0.1412709355354309, "learning_rate": 3.1843087195378176e-05, "loss": 0.0259, "step": 31870 }, { "epoch": 22.984859408795963, "grad_norm": 0.16254881024360657, "learning_rate": 3.181227924602312e-05, "loss": 0.0269, "step": 31880 }, { "epoch": 22.99206921413122, "grad_norm": 0.2123967558145523, "learning_rate": 3.178147925259506e-05, "loss": 0.0205, "step": 31890 }, { "epoch": 22.999279019466474, "grad_norm": 0.12927111983299255, "learning_rate": 3.175068722856693e-05, "loss": 0.0266, "step": 31900 }, { "epoch": 23.006488824801732, "grad_norm": 0.13976332545280457, "learning_rate": 3.17199031874082e-05, "loss": 0.0298, "step": 31910 }, { "epoch": 23.013698630136986, "grad_norm": 0.22610768675804138, "learning_rate": 3.168912714258487e-05, "loss": 0.0279, "step": 31920 }, { "epoch": 23.020908435472244, "grad_norm": 0.13467903435230255, "learning_rate": 3.165835910755944e-05, "loss": 0.0276, "step": 31930 }, { "epoch": 23.028118240807498, "grad_norm": 0.15392328798770905, "learning_rate": 3.1627599095790874e-05, "loss": 0.0269, "step": 31940 }, { "epoch": 23.035328046142755, "grad_norm": 0.14779776334762573, "learning_rate": 3.159684712073462e-05, "loss": 0.0292, "step": 31950 }, { "epoch": 23.04253785147801, "grad_norm": 0.12298551201820374, "learning_rate": 3.1566103195842695e-05, "loss": 0.027, "step": 31960 }, { "epoch": 23.049747656813267, "grad_norm": 0.15383794903755188, "learning_rate": 3.153536733456349e-05, "loss": 0.0326, "step": 31970 }, { "epoch": 23.05695746214852, "grad_norm": 0.17571423947811127, "learning_rate": 3.150463955034194e-05, "loss": 0.0297, "step": 31980 }, { "epoch": 23.06416726748378, "grad_norm": 0.19232743978500366, "learning_rate": 3.147391985661942e-05, "loss": 0.0269, "step": 31990 }, { "epoch": 23.071377072819033, "grad_norm": 0.12866316735744476, "learning_rate": 3.1443208266833755e-05, "loss": 0.0272, "step": 32000 }, { "epoch": 23.07858687815429, "grad_norm": 0.10494796186685562, "learning_rate": 3.141250479441924e-05, "loss": 0.0226, "step": 32010 }, { "epoch": 23.085796683489544, "grad_norm": 0.10277318209409714, "learning_rate": 3.138180945280664e-05, "loss": 0.0262, "step": 32020 }, { "epoch": 23.093006488824802, "grad_norm": 0.13416290283203125, "learning_rate": 3.1351122255423116e-05, "loss": 0.0245, "step": 32030 }, { "epoch": 23.100216294160056, "grad_norm": 0.18203170597553253, "learning_rate": 3.132044321569228e-05, "loss": 0.0278, "step": 32040 }, { "epoch": 23.107426099495314, "grad_norm": 0.16532237827777863, "learning_rate": 3.1289772347034226e-05, "loss": 0.0247, "step": 32050 }, { "epoch": 23.11463590483057, "grad_norm": 0.1281445473432541, "learning_rate": 3.1259109662865404e-05, "loss": 0.0265, "step": 32060 }, { "epoch": 23.121845710165825, "grad_norm": 0.1623126119375229, "learning_rate": 3.122845517659871e-05, "loss": 0.0246, "step": 32070 }, { "epoch": 23.129055515501083, "grad_norm": 0.12383629381656647, "learning_rate": 3.119780890164348e-05, "loss": 0.0264, "step": 32080 }, { "epoch": 23.136265320836337, "grad_norm": 0.15584920346736908, "learning_rate": 3.116717085140542e-05, "loss": 0.0266, "step": 32090 }, { "epoch": 23.143475126171595, "grad_norm": 0.1568782478570938, "learning_rate": 3.1136541039286657e-05, "loss": 0.0254, "step": 32100 }, { "epoch": 23.15068493150685, "grad_norm": 0.11357845366001129, "learning_rate": 3.110591947868571e-05, "loss": 0.0258, "step": 32110 }, { "epoch": 23.157894736842106, "grad_norm": 0.14639776945114136, "learning_rate": 3.107530618299751e-05, "loss": 0.028, "step": 32120 }, { "epoch": 23.16510454217736, "grad_norm": 0.1519240140914917, "learning_rate": 3.104470116561333e-05, "loss": 0.0288, "step": 32130 }, { "epoch": 23.172314347512618, "grad_norm": 0.15196217596530914, "learning_rate": 3.1014104439920835e-05, "loss": 0.0282, "step": 32140 }, { "epoch": 23.179524152847872, "grad_norm": 0.13643458485603333, "learning_rate": 3.098351601930411e-05, "loss": 0.0257, "step": 32150 }, { "epoch": 23.18673395818313, "grad_norm": 0.14562180638313293, "learning_rate": 3.0952935917143533e-05, "loss": 0.028, "step": 32160 }, { "epoch": 23.193943763518384, "grad_norm": 0.16213999688625336, "learning_rate": 3.092236414681588e-05, "loss": 0.0271, "step": 32170 }, { "epoch": 23.20115356885364, "grad_norm": 0.13068003952503204, "learning_rate": 3.089180072169431e-05, "loss": 0.0298, "step": 32180 }, { "epoch": 23.208363374188895, "grad_norm": 0.09922101348638535, "learning_rate": 3.08612456551483e-05, "loss": 0.0299, "step": 32190 }, { "epoch": 23.215573179524153, "grad_norm": 0.12450484931468964, "learning_rate": 3.083069896054363e-05, "loss": 0.0271, "step": 32200 }, { "epoch": 23.22278298485941, "grad_norm": 0.12407360225915909, "learning_rate": 3.080016065124253e-05, "loss": 0.0233, "step": 32210 }, { "epoch": 23.229992790194665, "grad_norm": 0.13048042356967926, "learning_rate": 3.076963074060344e-05, "loss": 0.0257, "step": 32220 }, { "epoch": 23.237202595529922, "grad_norm": 0.13343782722949982, "learning_rate": 3.07391092419812e-05, "loss": 0.0263, "step": 32230 }, { "epoch": 23.244412400865176, "grad_norm": 0.1845203936100006, "learning_rate": 3.070859616872695e-05, "loss": 0.0295, "step": 32240 }, { "epoch": 23.251622206200434, "grad_norm": 0.2610131502151489, "learning_rate": 3.067809153418816e-05, "loss": 0.0267, "step": 32250 }, { "epoch": 23.258832011535688, "grad_norm": 0.09900133311748505, "learning_rate": 3.0647595351708545e-05, "loss": 0.0262, "step": 32260 }, { "epoch": 23.266041816870946, "grad_norm": 0.1684826761484146, "learning_rate": 3.061710763462824e-05, "loss": 0.0241, "step": 32270 }, { "epoch": 23.2732516222062, "grad_norm": 0.07766503840684891, "learning_rate": 3.0586628396283564e-05, "loss": 0.0262, "step": 32280 }, { "epoch": 23.280461427541457, "grad_norm": 0.09103570878505707, "learning_rate": 3.0556157650007174e-05, "loss": 0.0275, "step": 32290 }, { "epoch": 23.28767123287671, "grad_norm": 0.17434062063694, "learning_rate": 3.052569540912801e-05, "loss": 0.028, "step": 32300 }, { "epoch": 23.29488103821197, "grad_norm": 0.1341964155435562, "learning_rate": 3.049524168697131e-05, "loss": 0.0238, "step": 32310 }, { "epoch": 23.302090843547223, "grad_norm": 0.17365816235542297, "learning_rate": 3.0464796496858538e-05, "loss": 0.0331, "step": 32320 }, { "epoch": 23.30930064888248, "grad_norm": 0.14227896928787231, "learning_rate": 3.0434359852107473e-05, "loss": 0.0283, "step": 32330 }, { "epoch": 23.316510454217735, "grad_norm": 0.16292032599449158, "learning_rate": 3.0403931766032135e-05, "loss": 0.025, "step": 32340 }, { "epoch": 23.323720259552992, "grad_norm": 0.1464499682188034, "learning_rate": 3.0373512251942814e-05, "loss": 0.0289, "step": 32350 }, { "epoch": 23.330930064888246, "grad_norm": 0.1483205258846283, "learning_rate": 3.0343101323146007e-05, "loss": 0.0295, "step": 32360 }, { "epoch": 23.338139870223504, "grad_norm": 0.09149190783500671, "learning_rate": 3.0312698992944523e-05, "loss": 0.0261, "step": 32370 }, { "epoch": 23.34534967555876, "grad_norm": 0.09867040812969208, "learning_rate": 3.028230527463736e-05, "loss": 0.0309, "step": 32380 }, { "epoch": 23.352559480894016, "grad_norm": 0.15926948189735413, "learning_rate": 3.0251920181519746e-05, "loss": 0.0283, "step": 32390 }, { "epoch": 23.359769286229273, "grad_norm": 0.1605098843574524, "learning_rate": 3.0221543726883183e-05, "loss": 0.0308, "step": 32400 }, { "epoch": 23.366979091564527, "grad_norm": 0.08780892193317413, "learning_rate": 3.019117592401535e-05, "loss": 0.0273, "step": 32410 }, { "epoch": 23.374188896899785, "grad_norm": 0.14798830449581146, "learning_rate": 3.0160816786200137e-05, "loss": 0.03, "step": 32420 }, { "epoch": 23.38139870223504, "grad_norm": 0.17579220235347748, "learning_rate": 3.01304663267177e-05, "loss": 0.0276, "step": 32430 }, { "epoch": 23.388608507570297, "grad_norm": 0.14011150598526, "learning_rate": 3.0100124558844338e-05, "loss": 0.0282, "step": 32440 }, { "epoch": 23.39581831290555, "grad_norm": 0.1759842485189438, "learning_rate": 3.0069791495852562e-05, "loss": 0.0272, "step": 32450 }, { "epoch": 23.403028118240808, "grad_norm": 0.16463229060173035, "learning_rate": 3.0039467151011113e-05, "loss": 0.0267, "step": 32460 }, { "epoch": 23.410237923576062, "grad_norm": 0.11470725387334824, "learning_rate": 3.000915153758487e-05, "loss": 0.027, "step": 32470 }, { "epoch": 23.41744772891132, "grad_norm": 0.14065644145011902, "learning_rate": 2.9978844668834917e-05, "loss": 0.0257, "step": 32480 }, { "epoch": 23.424657534246574, "grad_norm": 0.17693279683589935, "learning_rate": 2.994854655801849e-05, "loss": 0.0232, "step": 32490 }, { "epoch": 23.43186733958183, "grad_norm": 0.1307685524225235, "learning_rate": 2.991825721838905e-05, "loss": 0.0239, "step": 32500 }, { "epoch": 23.439077144917086, "grad_norm": 0.19572852551937103, "learning_rate": 2.9887976663196166e-05, "loss": 0.03, "step": 32510 }, { "epoch": 23.446286950252343, "grad_norm": 0.09660615026950836, "learning_rate": 2.985770490568556e-05, "loss": 0.0263, "step": 32520 }, { "epoch": 23.4534967555876, "grad_norm": 0.11740533262491226, "learning_rate": 2.9827441959099157e-05, "loss": 0.0336, "step": 32530 }, { "epoch": 23.460706560922855, "grad_norm": 0.19789820909500122, "learning_rate": 2.979718783667499e-05, "loss": 0.0251, "step": 32540 }, { "epoch": 23.467916366258113, "grad_norm": 0.14639762043952942, "learning_rate": 2.9766942551647215e-05, "loss": 0.0307, "step": 32550 }, { "epoch": 23.475126171593367, "grad_norm": 0.1501837521791458, "learning_rate": 2.9736706117246188e-05, "loss": 0.0308, "step": 32560 }, { "epoch": 23.482335976928624, "grad_norm": 0.14463596045970917, "learning_rate": 2.9706478546698324e-05, "loss": 0.028, "step": 32570 }, { "epoch": 23.48954578226388, "grad_norm": 0.15819823741912842, "learning_rate": 2.9676259853226195e-05, "loss": 0.0258, "step": 32580 }, { "epoch": 23.496755587599136, "grad_norm": 0.13960868120193481, "learning_rate": 2.964605005004849e-05, "loss": 0.033, "step": 32590 }, { "epoch": 23.50396539293439, "grad_norm": 0.13076946139335632, "learning_rate": 2.9615849150380016e-05, "loss": 0.0302, "step": 32600 }, { "epoch": 23.511175198269648, "grad_norm": 0.14892873167991638, "learning_rate": 2.9585657167431637e-05, "loss": 0.032, "step": 32610 }, { "epoch": 23.5183850036049, "grad_norm": 0.18644998967647552, "learning_rate": 2.9555474114410388e-05, "loss": 0.0242, "step": 32620 }, { "epoch": 23.52559480894016, "grad_norm": 0.2350740283727646, "learning_rate": 2.9525300004519348e-05, "loss": 0.0268, "step": 32630 }, { "epoch": 23.532804614275413, "grad_norm": 0.12440177798271179, "learning_rate": 2.9495134850957696e-05, "loss": 0.0255, "step": 32640 }, { "epoch": 23.54001441961067, "grad_norm": 0.11467932164669037, "learning_rate": 2.9464978666920674e-05, "loss": 0.0246, "step": 32650 }, { "epoch": 23.547224224945925, "grad_norm": 0.11993034929037094, "learning_rate": 2.9434831465599662e-05, "loss": 0.0302, "step": 32660 }, { "epoch": 23.554434030281183, "grad_norm": 0.10395566374063492, "learning_rate": 2.9404693260182047e-05, "loss": 0.0317, "step": 32670 }, { "epoch": 23.561643835616437, "grad_norm": 0.13552352786064148, "learning_rate": 2.9374564063851295e-05, "loss": 0.0268, "step": 32680 }, { "epoch": 23.568853640951694, "grad_norm": 0.1356576830148697, "learning_rate": 2.9344443889786967e-05, "loss": 0.0301, "step": 32690 }, { "epoch": 23.576063446286952, "grad_norm": 0.16853341460227966, "learning_rate": 2.931433275116463e-05, "loss": 0.0261, "step": 32700 }, { "epoch": 23.583273251622206, "grad_norm": 0.13683049380779266, "learning_rate": 2.928423066115592e-05, "loss": 0.0256, "step": 32710 }, { "epoch": 23.590483056957464, "grad_norm": 0.09141821414232254, "learning_rate": 2.9254137632928537e-05, "loss": 0.0271, "step": 32720 }, { "epoch": 23.597692862292718, "grad_norm": 0.16540101170539856, "learning_rate": 2.9224053679646167e-05, "loss": 0.0298, "step": 32730 }, { "epoch": 23.604902667627975, "grad_norm": 0.12356438487768173, "learning_rate": 2.9193978814468547e-05, "loss": 0.0253, "step": 32740 }, { "epoch": 23.61211247296323, "grad_norm": 0.14836861193180084, "learning_rate": 2.9163913050551485e-05, "loss": 0.0262, "step": 32750 }, { "epoch": 23.619322278298487, "grad_norm": 0.1320163905620575, "learning_rate": 2.9133856401046735e-05, "loss": 0.0299, "step": 32760 }, { "epoch": 23.62653208363374, "grad_norm": 0.1638461798429489, "learning_rate": 2.910380887910209e-05, "loss": 0.0238, "step": 32770 }, { "epoch": 23.633741888969, "grad_norm": 0.21028175950050354, "learning_rate": 2.9073770497861386e-05, "loss": 0.0295, "step": 32780 }, { "epoch": 23.640951694304253, "grad_norm": 0.1784667819738388, "learning_rate": 2.904374127046442e-05, "loss": 0.0294, "step": 32790 }, { "epoch": 23.64816149963951, "grad_norm": 0.11318258196115494, "learning_rate": 2.9013721210046973e-05, "loss": 0.0267, "step": 32800 }, { "epoch": 23.655371304974764, "grad_norm": 0.14543558657169342, "learning_rate": 2.8983710329740882e-05, "loss": 0.0243, "step": 32810 }, { "epoch": 23.662581110310022, "grad_norm": 0.1500001847743988, "learning_rate": 2.8953708642673898e-05, "loss": 0.0322, "step": 32820 }, { "epoch": 23.669790915645276, "grad_norm": 0.11183024942874908, "learning_rate": 2.8923716161969782e-05, "loss": 0.0252, "step": 32830 }, { "epoch": 23.677000720980534, "grad_norm": 0.1367800086736679, "learning_rate": 2.8893732900748248e-05, "loss": 0.0257, "step": 32840 }, { "epoch": 23.68421052631579, "grad_norm": 0.14261168241500854, "learning_rate": 2.8863758872125036e-05, "loss": 0.0243, "step": 32850 }, { "epoch": 23.691420331651045, "grad_norm": 0.14135576784610748, "learning_rate": 2.883379408921175e-05, "loss": 0.0282, "step": 32860 }, { "epoch": 23.698630136986303, "grad_norm": 0.14720727503299713, "learning_rate": 2.880383856511606e-05, "loss": 0.0247, "step": 32870 }, { "epoch": 23.705839942321557, "grad_norm": 0.11920975893735886, "learning_rate": 2.8773892312941476e-05, "loss": 0.0274, "step": 32880 }, { "epoch": 23.713049747656815, "grad_norm": 0.12428727746009827, "learning_rate": 2.8743955345787555e-05, "loss": 0.0233, "step": 32890 }, { "epoch": 23.72025955299207, "grad_norm": 0.14586840569972992, "learning_rate": 2.87140276767497e-05, "loss": 0.0279, "step": 32900 }, { "epoch": 23.727469358327326, "grad_norm": 0.10960683971643448, "learning_rate": 2.868410931891934e-05, "loss": 0.0318, "step": 32910 }, { "epoch": 23.73467916366258, "grad_norm": 0.12480223923921585, "learning_rate": 2.8654200285383748e-05, "loss": 0.0279, "step": 32920 }, { "epoch": 23.741888968997838, "grad_norm": 0.11934978514909744, "learning_rate": 2.8624300589226126e-05, "loss": 0.0236, "step": 32930 }, { "epoch": 23.749098774333092, "grad_norm": 0.1499069780111313, "learning_rate": 2.859441024352568e-05, "loss": 0.0223, "step": 32940 }, { "epoch": 23.75630857966835, "grad_norm": 0.195369154214859, "learning_rate": 2.8564529261357427e-05, "loss": 0.0258, "step": 32950 }, { "epoch": 23.763518385003604, "grad_norm": 0.11893408745527267, "learning_rate": 2.8534657655792307e-05, "loss": 0.0261, "step": 32960 }, { "epoch": 23.77072819033886, "grad_norm": 0.1098656877875328, "learning_rate": 2.8504795439897208e-05, "loss": 0.0333, "step": 32970 }, { "epoch": 23.777937995674115, "grad_norm": 0.14424929022789001, "learning_rate": 2.847494262673487e-05, "loss": 0.0276, "step": 32980 }, { "epoch": 23.785147801009373, "grad_norm": 0.1530589759349823, "learning_rate": 2.8445099229363925e-05, "loss": 0.0261, "step": 32990 }, { "epoch": 23.79235760634463, "grad_norm": 0.18474814295768738, "learning_rate": 2.841526526083886e-05, "loss": 0.0264, "step": 33000 }, { "epoch": 23.799567411679885, "grad_norm": 0.16382858157157898, "learning_rate": 2.838544073421011e-05, "loss": 0.0214, "step": 33010 }, { "epoch": 23.806777217015142, "grad_norm": 0.1682967096567154, "learning_rate": 2.8355625662523928e-05, "loss": 0.0273, "step": 33020 }, { "epoch": 23.813987022350396, "grad_norm": 0.1429983675479889, "learning_rate": 2.8325820058822407e-05, "loss": 0.0243, "step": 33030 }, { "epoch": 23.821196827685654, "grad_norm": 0.1627105325460434, "learning_rate": 2.829602393614357e-05, "loss": 0.0271, "step": 33040 }, { "epoch": 23.828406633020908, "grad_norm": 0.17026068270206451, "learning_rate": 2.8266237307521236e-05, "loss": 0.0251, "step": 33050 }, { "epoch": 23.835616438356166, "grad_norm": 0.102246955037117, "learning_rate": 2.8236460185985074e-05, "loss": 0.023, "step": 33060 }, { "epoch": 23.84282624369142, "grad_norm": 0.24003244936466217, "learning_rate": 2.8206692584560646e-05, "loss": 0.0284, "step": 33070 }, { "epoch": 23.850036049026677, "grad_norm": 0.14209096133708954, "learning_rate": 2.8176934516269283e-05, "loss": 0.0242, "step": 33080 }, { "epoch": 23.85724585436193, "grad_norm": 0.1234656274318695, "learning_rate": 2.8147185994128167e-05, "loss": 0.0271, "step": 33090 }, { "epoch": 23.86445565969719, "grad_norm": 0.13380497694015503, "learning_rate": 2.8117447031150346e-05, "loss": 0.0233, "step": 33100 }, { "epoch": 23.871665465032443, "grad_norm": 0.14136242866516113, "learning_rate": 2.8087717640344618e-05, "loss": 0.0301, "step": 33110 }, { "epoch": 23.8788752703677, "grad_norm": 0.14291560649871826, "learning_rate": 2.805799783471566e-05, "loss": 0.0277, "step": 33120 }, { "epoch": 23.886085075702955, "grad_norm": 0.16222265362739563, "learning_rate": 2.8028287627263895e-05, "loss": 0.0283, "step": 33130 }, { "epoch": 23.893294881038212, "grad_norm": 0.162430077791214, "learning_rate": 2.7998587030985608e-05, "loss": 0.0237, "step": 33140 }, { "epoch": 23.900504686373466, "grad_norm": 0.18560771644115448, "learning_rate": 2.7968896058872806e-05, "loss": 0.03, "step": 33150 }, { "epoch": 23.907714491708724, "grad_norm": 0.15107101202011108, "learning_rate": 2.7939214723913388e-05, "loss": 0.0335, "step": 33160 }, { "epoch": 23.91492429704398, "grad_norm": 0.1231297180056572, "learning_rate": 2.7909543039090935e-05, "loss": 0.0258, "step": 33170 }, { "epoch": 23.922134102379236, "grad_norm": 0.1931617707014084, "learning_rate": 2.787988101738487e-05, "loss": 0.0253, "step": 33180 }, { "epoch": 23.929343907714493, "grad_norm": 0.12216729670763016, "learning_rate": 2.7850228671770334e-05, "loss": 0.0266, "step": 33190 }, { "epoch": 23.936553713049747, "grad_norm": 0.1916608214378357, "learning_rate": 2.782058601521832e-05, "loss": 0.0281, "step": 33200 }, { "epoch": 23.943763518385005, "grad_norm": 0.16253520548343658, "learning_rate": 2.77909530606955e-05, "loss": 0.0344, "step": 33210 }, { "epoch": 23.95097332372026, "grad_norm": 0.11123587936162949, "learning_rate": 2.7761329821164335e-05, "loss": 0.027, "step": 33220 }, { "epoch": 23.958183129055517, "grad_norm": 0.12859013676643372, "learning_rate": 2.7731716309583062e-05, "loss": 0.0264, "step": 33230 }, { "epoch": 23.96539293439077, "grad_norm": 0.15468734502792358, "learning_rate": 2.7702112538905624e-05, "loss": 0.0243, "step": 33240 }, { "epoch": 23.972602739726028, "grad_norm": 0.13024544715881348, "learning_rate": 2.7672518522081682e-05, "loss": 0.0265, "step": 33250 }, { "epoch": 23.979812545061282, "grad_norm": 0.1378520429134369, "learning_rate": 2.7642934272056726e-05, "loss": 0.025, "step": 33260 }, { "epoch": 23.98702235039654, "grad_norm": 0.13960951566696167, "learning_rate": 2.7613359801771877e-05, "loss": 0.0292, "step": 33270 }, { "epoch": 23.994232155731794, "grad_norm": 0.116239532828331, "learning_rate": 2.7583795124163996e-05, "loss": 0.0198, "step": 33280 }, { "epoch": 24.00144196106705, "grad_norm": 0.15009862184524536, "learning_rate": 2.755424025216572e-05, "loss": 0.0313, "step": 33290 }, { "epoch": 24.008651766402306, "grad_norm": 0.11256299167871475, "learning_rate": 2.7524695198705334e-05, "loss": 0.0242, "step": 33300 }, { "epoch": 24.015861571737563, "grad_norm": 0.15094652771949768, "learning_rate": 2.749515997670683e-05, "loss": 0.0272, "step": 33310 }, { "epoch": 24.023071377072817, "grad_norm": 0.11831463873386383, "learning_rate": 2.746563459908995e-05, "loss": 0.0268, "step": 33320 }, { "epoch": 24.030281182408075, "grad_norm": 0.1189243420958519, "learning_rate": 2.7436119078770085e-05, "loss": 0.0268, "step": 33330 }, { "epoch": 24.037490987743332, "grad_norm": 0.1662106066942215, "learning_rate": 2.74066134286583e-05, "loss": 0.0292, "step": 33340 }, { "epoch": 24.044700793078587, "grad_norm": 0.203890860080719, "learning_rate": 2.7377117661661412e-05, "loss": 0.0302, "step": 33350 }, { "epoch": 24.051910598413844, "grad_norm": 0.13164561986923218, "learning_rate": 2.734763179068183e-05, "loss": 0.026, "step": 33360 }, { "epoch": 24.059120403749098, "grad_norm": 0.15246348083019257, "learning_rate": 2.7318155828617718e-05, "loss": 0.0259, "step": 33370 }, { "epoch": 24.066330209084356, "grad_norm": 0.12005532532930374, "learning_rate": 2.7288689788362832e-05, "loss": 0.0262, "step": 33380 }, { "epoch": 24.07354001441961, "grad_norm": 0.11185519397258759, "learning_rate": 2.725923368280664e-05, "loss": 0.0328, "step": 33390 }, { "epoch": 24.080749819754868, "grad_norm": 0.15540865063667297, "learning_rate": 2.7229787524834237e-05, "loss": 0.0308, "step": 33400 }, { "epoch": 24.08795962509012, "grad_norm": 0.17399215698242188, "learning_rate": 2.7200351327326345e-05, "loss": 0.0244, "step": 33410 }, { "epoch": 24.09516943042538, "grad_norm": 0.1404261440038681, "learning_rate": 2.7170925103159407e-05, "loss": 0.0243, "step": 33420 }, { "epoch": 24.102379235760633, "grad_norm": 0.1212763786315918, "learning_rate": 2.714150886520542e-05, "loss": 0.0218, "step": 33430 }, { "epoch": 24.10958904109589, "grad_norm": 0.15398114919662476, "learning_rate": 2.7112102626332038e-05, "loss": 0.0245, "step": 33440 }, { "epoch": 24.116798846431145, "grad_norm": 0.12757626175880432, "learning_rate": 2.7082706399402574e-05, "loss": 0.026, "step": 33450 }, { "epoch": 24.124008651766403, "grad_norm": 0.14920346438884735, "learning_rate": 2.7053320197275934e-05, "loss": 0.0231, "step": 33460 }, { "epoch": 24.131218457101657, "grad_norm": 0.14792205393314362, "learning_rate": 2.7023944032806614e-05, "loss": 0.0277, "step": 33470 }, { "epoch": 24.138428262436914, "grad_norm": 0.16664312779903412, "learning_rate": 2.699457791884478e-05, "loss": 0.0263, "step": 33480 }, { "epoch": 24.145638067772172, "grad_norm": 0.19276289641857147, "learning_rate": 2.6965221868236155e-05, "loss": 0.0244, "step": 33490 }, { "epoch": 24.152847873107426, "grad_norm": 0.18991269171237946, "learning_rate": 2.6935875893822077e-05, "loss": 0.0259, "step": 33500 }, { "epoch": 24.160057678442683, "grad_norm": 0.1884521245956421, "learning_rate": 2.690654000843944e-05, "loss": 0.0298, "step": 33510 }, { "epoch": 24.167267483777938, "grad_norm": 0.11740035563707352, "learning_rate": 2.687721422492081e-05, "loss": 0.027, "step": 33520 }, { "epoch": 24.174477289113195, "grad_norm": 0.13978014886379242, "learning_rate": 2.6847898556094252e-05, "loss": 0.0289, "step": 33530 }, { "epoch": 24.18168709444845, "grad_norm": 0.16638687252998352, "learning_rate": 2.6818593014783417e-05, "loss": 0.0265, "step": 33540 }, { "epoch": 24.188896899783707, "grad_norm": 0.1612180471420288, "learning_rate": 2.6789297613807585e-05, "loss": 0.0236, "step": 33550 }, { "epoch": 24.19610670511896, "grad_norm": 0.12061967700719833, "learning_rate": 2.676001236598154e-05, "loss": 0.0298, "step": 33560 }, { "epoch": 24.20331651045422, "grad_norm": 0.1429944783449173, "learning_rate": 2.6730737284115626e-05, "loss": 0.0305, "step": 33570 }, { "epoch": 24.210526315789473, "grad_norm": 0.18191392719745636, "learning_rate": 2.670147238101579e-05, "loss": 0.0243, "step": 33580 }, { "epoch": 24.21773612112473, "grad_norm": 0.1563529372215271, "learning_rate": 2.6672217669483473e-05, "loss": 0.0301, "step": 33590 }, { "epoch": 24.224945926459984, "grad_norm": 0.14457228779792786, "learning_rate": 2.6642973162315706e-05, "loss": 0.0293, "step": 33600 }, { "epoch": 24.232155731795242, "grad_norm": 0.20048902928829193, "learning_rate": 2.6613738872304988e-05, "loss": 0.0355, "step": 33610 }, { "epoch": 24.239365537130496, "grad_norm": 0.13384556770324707, "learning_rate": 2.6584514812239437e-05, "loss": 0.0255, "step": 33620 }, { "epoch": 24.246575342465754, "grad_norm": 0.15705181658267975, "learning_rate": 2.655530099490261e-05, "loss": 0.0243, "step": 33630 }, { "epoch": 24.25378514780101, "grad_norm": 0.11385642737150192, "learning_rate": 2.6526097433073653e-05, "loss": 0.0288, "step": 33640 }, { "epoch": 24.260994953136265, "grad_norm": 0.16369934380054474, "learning_rate": 2.649690413952719e-05, "loss": 0.0238, "step": 33650 }, { "epoch": 24.268204758471523, "grad_norm": 0.15842872858047485, "learning_rate": 2.646772112703334e-05, "loss": 0.0268, "step": 33660 }, { "epoch": 24.275414563806777, "grad_norm": 0.11727274954319, "learning_rate": 2.6438548408357768e-05, "loss": 0.0272, "step": 33670 }, { "epoch": 24.282624369142034, "grad_norm": 0.15105284750461578, "learning_rate": 2.6409385996261603e-05, "loss": 0.027, "step": 33680 }, { "epoch": 24.28983417447729, "grad_norm": 0.19454044103622437, "learning_rate": 2.638023390350148e-05, "loss": 0.0252, "step": 33690 }, { "epoch": 24.297043979812546, "grad_norm": 0.07899586111307144, "learning_rate": 2.635109214282948e-05, "loss": 0.0205, "step": 33700 }, { "epoch": 24.3042537851478, "grad_norm": 0.07854095101356506, "learning_rate": 2.6321960726993254e-05, "loss": 0.0272, "step": 33710 }, { "epoch": 24.311463590483058, "grad_norm": 0.13706673681735992, "learning_rate": 2.6292839668735846e-05, "loss": 0.0264, "step": 33720 }, { "epoch": 24.318673395818312, "grad_norm": 0.12528321146965027, "learning_rate": 2.6263728980795778e-05, "loss": 0.0248, "step": 33730 }, { "epoch": 24.32588320115357, "grad_norm": 0.13326480984687805, "learning_rate": 2.6234628675907092e-05, "loss": 0.0265, "step": 33740 }, { "epoch": 24.333093006488824, "grad_norm": 0.1421353965997696, "learning_rate": 2.620553876679923e-05, "loss": 0.0248, "step": 33750 }, { "epoch": 24.34030281182408, "grad_norm": 0.11422103643417358, "learning_rate": 2.617645926619709e-05, "loss": 0.0242, "step": 33760 }, { "epoch": 24.347512617159335, "grad_norm": 0.15724283456802368, "learning_rate": 2.6147390186821064e-05, "loss": 0.029, "step": 33770 }, { "epoch": 24.354722422494593, "grad_norm": 0.08238894492387772, "learning_rate": 2.6118331541386955e-05, "loss": 0.0253, "step": 33780 }, { "epoch": 24.361932227829847, "grad_norm": 0.10734716802835464, "learning_rate": 2.6089283342605953e-05, "loss": 0.0293, "step": 33790 }, { "epoch": 24.369142033165105, "grad_norm": 0.14454713463783264, "learning_rate": 2.6060245603184797e-05, "loss": 0.0275, "step": 33800 }, { "epoch": 24.376351838500362, "grad_norm": 0.12944623827934265, "learning_rate": 2.6031218335825536e-05, "loss": 0.0261, "step": 33810 }, { "epoch": 24.383561643835616, "grad_norm": 0.1760254055261612, "learning_rate": 2.600220155322568e-05, "loss": 0.0301, "step": 33820 }, { "epoch": 24.390771449170874, "grad_norm": 0.12950395047664642, "learning_rate": 2.5973195268078186e-05, "loss": 0.0285, "step": 33830 }, { "epoch": 24.397981254506128, "grad_norm": 0.09420006722211838, "learning_rate": 2.5944199493071352e-05, "loss": 0.0208, "step": 33840 }, { "epoch": 24.405191059841385, "grad_norm": 0.16687190532684326, "learning_rate": 2.591521424088895e-05, "loss": 0.0304, "step": 33850 }, { "epoch": 24.41240086517664, "grad_norm": 0.16196854412555695, "learning_rate": 2.5886239524210086e-05, "loss": 0.0277, "step": 33860 }, { "epoch": 24.419610670511897, "grad_norm": 0.1389644891023636, "learning_rate": 2.5857275355709317e-05, "loss": 0.023, "step": 33870 }, { "epoch": 24.42682047584715, "grad_norm": 0.19463777542114258, "learning_rate": 2.5828321748056533e-05, "loss": 0.0252, "step": 33880 }, { "epoch": 24.43403028118241, "grad_norm": 0.09791234880685806, "learning_rate": 2.5799378713917012e-05, "loss": 0.0225, "step": 33890 }, { "epoch": 24.441240086517663, "grad_norm": 0.1421855390071869, "learning_rate": 2.5770446265951455e-05, "loss": 0.0249, "step": 33900 }, { "epoch": 24.44844989185292, "grad_norm": 0.17004826664924622, "learning_rate": 2.5741524416815877e-05, "loss": 0.0234, "step": 33910 }, { "epoch": 24.455659697188175, "grad_norm": 0.12197312712669373, "learning_rate": 2.571261317916166e-05, "loss": 0.0252, "step": 33920 }, { "epoch": 24.462869502523432, "grad_norm": 0.17416119575500488, "learning_rate": 2.5683712565635597e-05, "loss": 0.0274, "step": 33930 }, { "epoch": 24.470079307858686, "grad_norm": 0.17985819280147552, "learning_rate": 2.565482258887979e-05, "loss": 0.0246, "step": 33940 }, { "epoch": 24.477289113193944, "grad_norm": 0.12237819284200668, "learning_rate": 2.5625943261531664e-05, "loss": 0.0248, "step": 33950 }, { "epoch": 24.484498918529198, "grad_norm": 0.16277675330638885, "learning_rate": 2.5597074596224067e-05, "loss": 0.0259, "step": 33960 }, { "epoch": 24.491708723864456, "grad_norm": 0.1402057707309723, "learning_rate": 2.5568216605585105e-05, "loss": 0.025, "step": 33970 }, { "epoch": 24.498918529199713, "grad_norm": 0.1330382376909256, "learning_rate": 2.5539369302238235e-05, "loss": 0.0288, "step": 33980 }, { "epoch": 24.506128334534967, "grad_norm": 0.1605004072189331, "learning_rate": 2.5510532698802276e-05, "loss": 0.0262, "step": 33990 }, { "epoch": 24.513338139870225, "grad_norm": 0.08725405484437943, "learning_rate": 2.5481706807891324e-05, "loss": 0.026, "step": 34000 }, { "epoch": 24.52054794520548, "grad_norm": 0.16785135865211487, "learning_rate": 2.5452891642114783e-05, "loss": 0.0311, "step": 34010 }, { "epoch": 24.527757750540736, "grad_norm": 0.16908182203769684, "learning_rate": 2.542408721407742e-05, "loss": 0.0279, "step": 34020 }, { "epoch": 24.53496755587599, "grad_norm": 0.11515950411558151, "learning_rate": 2.539529353637926e-05, "loss": 0.0226, "step": 34030 }, { "epoch": 24.542177361211248, "grad_norm": 0.14974214136600494, "learning_rate": 2.5366510621615626e-05, "loss": 0.0312, "step": 34040 }, { "epoch": 24.549387166546502, "grad_norm": 0.11265935003757477, "learning_rate": 2.5337738482377132e-05, "loss": 0.0198, "step": 34050 }, { "epoch": 24.55659697188176, "grad_norm": 0.2348363697528839, "learning_rate": 2.5308977131249722e-05, "loss": 0.0295, "step": 34060 }, { "epoch": 24.563806777217014, "grad_norm": 0.10532645881175995, "learning_rate": 2.5280226580814554e-05, "loss": 0.0214, "step": 34070 }, { "epoch": 24.57101658255227, "grad_norm": 0.13241682946681976, "learning_rate": 2.525148684364813e-05, "loss": 0.0268, "step": 34080 }, { "epoch": 24.578226387887526, "grad_norm": 0.14638328552246094, "learning_rate": 2.522275793232215e-05, "loss": 0.027, "step": 34090 }, { "epoch": 24.585436193222783, "grad_norm": 0.12863607704639435, "learning_rate": 2.519403985940365e-05, "loss": 0.023, "step": 34100 }, { "epoch": 24.592645998558037, "grad_norm": 0.1354246586561203, "learning_rate": 2.5165332637454863e-05, "loss": 0.0242, "step": 34110 }, { "epoch": 24.599855803893295, "grad_norm": 0.140843003988266, "learning_rate": 2.513663627903334e-05, "loss": 0.0303, "step": 34120 }, { "epoch": 24.607065609228552, "grad_norm": 0.11474812775850296, "learning_rate": 2.5107950796691815e-05, "loss": 0.0272, "step": 34130 }, { "epoch": 24.614275414563807, "grad_norm": 0.1521645337343216, "learning_rate": 2.5079276202978276e-05, "loss": 0.0219, "step": 34140 }, { "epoch": 24.621485219899064, "grad_norm": 0.1082567349076271, "learning_rate": 2.505061251043601e-05, "loss": 0.0234, "step": 34150 }, { "epoch": 24.628695025234318, "grad_norm": 0.17644865810871124, "learning_rate": 2.502195973160346e-05, "loss": 0.0319, "step": 34160 }, { "epoch": 24.635904830569576, "grad_norm": 0.16833510994911194, "learning_rate": 2.4993317879014316e-05, "loss": 0.0283, "step": 34170 }, { "epoch": 24.64311463590483, "grad_norm": 0.14983926713466644, "learning_rate": 2.4964686965197526e-05, "loss": 0.0303, "step": 34180 }, { "epoch": 24.650324441240087, "grad_norm": 0.1355542540550232, "learning_rate": 2.4936067002677217e-05, "loss": 0.0261, "step": 34190 }, { "epoch": 24.65753424657534, "grad_norm": 0.11984842270612717, "learning_rate": 2.4907458003972724e-05, "loss": 0.0263, "step": 34200 }, { "epoch": 24.6647440519106, "grad_norm": 0.10779184103012085, "learning_rate": 2.4878859981598572e-05, "loss": 0.023, "step": 34210 }, { "epoch": 24.671953857245853, "grad_norm": 0.24280939996242523, "learning_rate": 2.4850272948064555e-05, "loss": 0.0318, "step": 34220 }, { "epoch": 24.67916366258111, "grad_norm": 0.1256897896528244, "learning_rate": 2.4821696915875596e-05, "loss": 0.035, "step": 34230 }, { "epoch": 24.686373467916365, "grad_norm": 0.14071890711784363, "learning_rate": 2.4793131897531798e-05, "loss": 0.0228, "step": 34240 }, { "epoch": 24.693583273251623, "grad_norm": 0.1257564276456833, "learning_rate": 2.47645779055285e-05, "loss": 0.022, "step": 34250 }, { "epoch": 24.700793078586877, "grad_norm": 0.1439196914434433, "learning_rate": 2.4736034952356186e-05, "loss": 0.0286, "step": 34260 }, { "epoch": 24.708002883922134, "grad_norm": 0.2318868637084961, "learning_rate": 2.470750305050048e-05, "loss": 0.0257, "step": 34270 }, { "epoch": 24.715212689257392, "grad_norm": 0.12913808226585388, "learning_rate": 2.4678982212442253e-05, "loss": 0.0256, "step": 34280 }, { "epoch": 24.722422494592646, "grad_norm": 0.16281519830226898, "learning_rate": 2.4650472450657463e-05, "loss": 0.0268, "step": 34290 }, { "epoch": 24.729632299927903, "grad_norm": 0.21022634208202362, "learning_rate": 2.4621973777617237e-05, "loss": 0.0286, "step": 34300 }, { "epoch": 24.736842105263158, "grad_norm": 0.1321067363023758, "learning_rate": 2.459348620578789e-05, "loss": 0.0263, "step": 34310 }, { "epoch": 24.744051910598415, "grad_norm": 0.13584178686141968, "learning_rate": 2.456500974763083e-05, "loss": 0.0247, "step": 34320 }, { "epoch": 24.75126171593367, "grad_norm": 0.18090444803237915, "learning_rate": 2.4536544415602653e-05, "loss": 0.0284, "step": 34330 }, { "epoch": 24.758471521268927, "grad_norm": 0.18019694089889526, "learning_rate": 2.4508090222155027e-05, "loss": 0.025, "step": 34340 }, { "epoch": 24.76568132660418, "grad_norm": 0.10168202221393585, "learning_rate": 2.4479647179734822e-05, "loss": 0.0256, "step": 34350 }, { "epoch": 24.77289113193944, "grad_norm": 0.1449117809534073, "learning_rate": 2.4451215300783953e-05, "loss": 0.0281, "step": 34360 }, { "epoch": 24.780100937274693, "grad_norm": 0.1776004284620285, "learning_rate": 2.442279459773953e-05, "loss": 0.0326, "step": 34370 }, { "epoch": 24.78731074260995, "grad_norm": 0.14692926406860352, "learning_rate": 2.4394385083033712e-05, "loss": 0.0263, "step": 34380 }, { "epoch": 24.794520547945204, "grad_norm": 0.17035840451717377, "learning_rate": 2.4365986769093792e-05, "loss": 0.0245, "step": 34390 }, { "epoch": 24.801730353280462, "grad_norm": 0.10739351809024811, "learning_rate": 2.4337599668342127e-05, "loss": 0.0287, "step": 34400 }, { "epoch": 24.808940158615716, "grad_norm": 0.14491263031959534, "learning_rate": 2.4309223793196245e-05, "loss": 0.024, "step": 34410 }, { "epoch": 24.816149963950974, "grad_norm": 0.1135808676481247, "learning_rate": 2.4280859156068698e-05, "loss": 0.0249, "step": 34420 }, { "epoch": 24.823359769286228, "grad_norm": 0.1805468052625656, "learning_rate": 2.4252505769367123e-05, "loss": 0.0301, "step": 34430 }, { "epoch": 24.830569574621485, "grad_norm": 0.12088008970022202, "learning_rate": 2.422416364549429e-05, "loss": 0.0268, "step": 34440 }, { "epoch": 24.837779379956743, "grad_norm": 0.1525232493877411, "learning_rate": 2.4195832796847994e-05, "loss": 0.0288, "step": 34450 }, { "epoch": 24.844989185291997, "grad_norm": 0.09080122411251068, "learning_rate": 2.416751323582109e-05, "loss": 0.0267, "step": 34460 }, { "epoch": 24.852198990627254, "grad_norm": 0.1249062642455101, "learning_rate": 2.4139204974801544e-05, "loss": 0.0298, "step": 34470 }, { "epoch": 24.85940879596251, "grad_norm": 0.12522907555103302, "learning_rate": 2.4110908026172347e-05, "loss": 0.0274, "step": 34480 }, { "epoch": 24.866618601297766, "grad_norm": 0.19463758170604706, "learning_rate": 2.4082622402311518e-05, "loss": 0.0244, "step": 34490 }, { "epoch": 24.87382840663302, "grad_norm": 0.1522488296031952, "learning_rate": 2.4054348115592183e-05, "loss": 0.0265, "step": 34500 }, { "epoch": 24.881038211968278, "grad_norm": 0.1867920309305191, "learning_rate": 2.4026085178382475e-05, "loss": 0.0307, "step": 34510 }, { "epoch": 24.888248017303532, "grad_norm": 0.16654713451862335, "learning_rate": 2.3997833603045523e-05, "loss": 0.0259, "step": 34520 }, { "epoch": 24.89545782263879, "grad_norm": 0.17303793132305145, "learning_rate": 2.396959340193958e-05, "loss": 0.0234, "step": 34530 }, { "epoch": 24.902667627974044, "grad_norm": 0.1311596781015396, "learning_rate": 2.3941364587417836e-05, "loss": 0.0299, "step": 34540 }, { "epoch": 24.9098774333093, "grad_norm": 0.13117468357086182, "learning_rate": 2.3913147171828526e-05, "loss": 0.0236, "step": 34550 }, { "epoch": 24.917087238644555, "grad_norm": 0.17301109433174133, "learning_rate": 2.388494116751494e-05, "loss": 0.0267, "step": 34560 }, { "epoch": 24.924297043979813, "grad_norm": 0.17904691398143768, "learning_rate": 2.3856746586815302e-05, "loss": 0.0289, "step": 34570 }, { "epoch": 24.931506849315067, "grad_norm": 0.11756716668605804, "learning_rate": 2.3828563442062924e-05, "loss": 0.0237, "step": 34580 }, { "epoch": 24.938716654650325, "grad_norm": 0.15197592973709106, "learning_rate": 2.3800391745586026e-05, "loss": 0.0279, "step": 34590 }, { "epoch": 24.94592645998558, "grad_norm": 0.18286024034023285, "learning_rate": 2.377223150970791e-05, "loss": 0.0232, "step": 34600 }, { "epoch": 24.953136265320836, "grad_norm": 0.12570300698280334, "learning_rate": 2.3744082746746793e-05, "loss": 0.0309, "step": 34610 }, { "epoch": 24.960346070656094, "grad_norm": 0.14862915873527527, "learning_rate": 2.3715945469015893e-05, "loss": 0.0235, "step": 34620 }, { "epoch": 24.967555875991348, "grad_norm": 0.15821819007396698, "learning_rate": 2.368781968882343e-05, "loss": 0.0248, "step": 34630 }, { "epoch": 24.974765681326605, "grad_norm": 0.1585134118795395, "learning_rate": 2.365970541847258e-05, "loss": 0.0338, "step": 34640 }, { "epoch": 24.98197548666186, "grad_norm": 0.14288018643856049, "learning_rate": 2.3631602670261445e-05, "loss": 0.0258, "step": 34650 }, { "epoch": 24.989185291997117, "grad_norm": 0.15877434611320496, "learning_rate": 2.3603511456483164e-05, "loss": 0.0232, "step": 34660 }, { "epoch": 24.99639509733237, "grad_norm": 0.14747563004493713, "learning_rate": 2.357543178942577e-05, "loss": 0.0263, "step": 34670 }, { "epoch": 25.00360490266763, "grad_norm": 0.18747343122959137, "learning_rate": 2.3547363681372242e-05, "loss": 0.0299, "step": 34680 }, { "epoch": 25.010814708002883, "grad_norm": 0.11817397177219391, "learning_rate": 2.3519307144600565e-05, "loss": 0.0178, "step": 34690 }, { "epoch": 25.01802451333814, "grad_norm": 0.1797778457403183, "learning_rate": 2.3491262191383607e-05, "loss": 0.0279, "step": 34700 }, { "epoch": 25.025234318673395, "grad_norm": 0.13017305731773376, "learning_rate": 2.3463228833989154e-05, "loss": 0.0271, "step": 34710 }, { "epoch": 25.032444124008652, "grad_norm": 0.14650849997997284, "learning_rate": 2.343520708467999e-05, "loss": 0.0243, "step": 34720 }, { "epoch": 25.039653929343906, "grad_norm": 0.15937164425849915, "learning_rate": 2.3407196955713767e-05, "loss": 0.0214, "step": 34730 }, { "epoch": 25.046863734679164, "grad_norm": 0.1442730575799942, "learning_rate": 2.337919845934306e-05, "loss": 0.0279, "step": 34740 }, { "epoch": 25.054073540014418, "grad_norm": 0.20487679541110992, "learning_rate": 2.335121160781535e-05, "loss": 0.0273, "step": 34750 }, { "epoch": 25.061283345349675, "grad_norm": 0.12864960730075836, "learning_rate": 2.3323236413373063e-05, "loss": 0.0249, "step": 34760 }, { "epoch": 25.068493150684933, "grad_norm": 0.15073657035827637, "learning_rate": 2.3295272888253494e-05, "loss": 0.0225, "step": 34770 }, { "epoch": 25.075702956020187, "grad_norm": 0.12026192247867584, "learning_rate": 2.3267321044688815e-05, "loss": 0.0262, "step": 34780 }, { "epoch": 25.082912761355445, "grad_norm": 0.15809081494808197, "learning_rate": 2.323938089490615e-05, "loss": 0.0262, "step": 34790 }, { "epoch": 25.0901225666907, "grad_norm": 0.15008187294006348, "learning_rate": 2.321145245112743e-05, "loss": 0.024, "step": 34800 }, { "epoch": 25.097332372025956, "grad_norm": 0.11410825699567795, "learning_rate": 2.318353572556953e-05, "loss": 0.0316, "step": 34810 }, { "epoch": 25.10454217736121, "grad_norm": 0.14069047570228577, "learning_rate": 2.3155630730444183e-05, "loss": 0.0295, "step": 34820 }, { "epoch": 25.111751982696468, "grad_norm": 0.1602691411972046, "learning_rate": 2.3127737477957968e-05, "loss": 0.0239, "step": 34830 }, { "epoch": 25.118961788031722, "grad_norm": 0.11145729571580887, "learning_rate": 2.3099855980312324e-05, "loss": 0.024, "step": 34840 }, { "epoch": 25.12617159336698, "grad_norm": 0.15474657714366913, "learning_rate": 2.3071986249703602e-05, "loss": 0.0222, "step": 34850 }, { "epoch": 25.133381398702234, "grad_norm": 0.1408185511827469, "learning_rate": 2.3044128298322953e-05, "loss": 0.0234, "step": 34860 }, { "epoch": 25.14059120403749, "grad_norm": 0.18056488037109375, "learning_rate": 2.301628213835637e-05, "loss": 0.0279, "step": 34870 }, { "epoch": 25.147801009372746, "grad_norm": 0.11534275859594345, "learning_rate": 2.2988447781984744e-05, "loss": 0.0196, "step": 34880 }, { "epoch": 25.155010814708003, "grad_norm": 0.10495170205831528, "learning_rate": 2.2960625241383753e-05, "loss": 0.0259, "step": 34890 }, { "epoch": 25.162220620043257, "grad_norm": 0.1348360925912857, "learning_rate": 2.2932814528723922e-05, "loss": 0.0245, "step": 34900 }, { "epoch": 25.169430425378515, "grad_norm": 0.16660036146640778, "learning_rate": 2.2905015656170582e-05, "loss": 0.0256, "step": 34910 }, { "epoch": 25.176640230713772, "grad_norm": 0.1795978993177414, "learning_rate": 2.287722863588394e-05, "loss": 0.024, "step": 34920 }, { "epoch": 25.183850036049026, "grad_norm": 0.19078953564167023, "learning_rate": 2.2849453480018962e-05, "loss": 0.0293, "step": 34930 }, { "epoch": 25.191059841384284, "grad_norm": 0.15251986682415009, "learning_rate": 2.282169020072543e-05, "loss": 0.0273, "step": 34940 }, { "epoch": 25.198269646719538, "grad_norm": 0.13094234466552734, "learning_rate": 2.2793938810147986e-05, "loss": 0.0308, "step": 34950 }, { "epoch": 25.205479452054796, "grad_norm": 0.15525656938552856, "learning_rate": 2.2766199320426007e-05, "loss": 0.0284, "step": 34960 }, { "epoch": 25.21268925739005, "grad_norm": 0.15361107885837555, "learning_rate": 2.2738471743693674e-05, "loss": 0.0233, "step": 34970 }, { "epoch": 25.219899062725307, "grad_norm": 0.1533089131116867, "learning_rate": 2.2710756092080006e-05, "loss": 0.025, "step": 34980 }, { "epoch": 25.22710886806056, "grad_norm": 0.1293765753507614, "learning_rate": 2.2683052377708747e-05, "loss": 0.022, "step": 34990 }, { "epoch": 25.23431867339582, "grad_norm": 0.09071408212184906, "learning_rate": 2.265536061269843e-05, "loss": 0.0251, "step": 35000 }, { "epoch": 25.241528478731073, "grad_norm": 0.15243327617645264, "learning_rate": 2.262768080916241e-05, "loss": 0.0256, "step": 35010 }, { "epoch": 25.24873828406633, "grad_norm": 0.11861243844032288, "learning_rate": 2.260001297920874e-05, "loss": 0.0227, "step": 35020 }, { "epoch": 25.255948089401585, "grad_norm": 0.14352896809577942, "learning_rate": 2.2572357134940297e-05, "loss": 0.024, "step": 35030 }, { "epoch": 25.263157894736842, "grad_norm": 0.17103423178195953, "learning_rate": 2.2544713288454655e-05, "loss": 0.0274, "step": 35040 }, { "epoch": 25.270367700072097, "grad_norm": 0.2537018060684204, "learning_rate": 2.2517081451844213e-05, "loss": 0.0279, "step": 35050 }, { "epoch": 25.277577505407354, "grad_norm": 0.15778353810310364, "learning_rate": 2.248946163719605e-05, "loss": 0.0272, "step": 35060 }, { "epoch": 25.284787310742608, "grad_norm": 0.09262839704751968, "learning_rate": 2.2461853856591996e-05, "loss": 0.0279, "step": 35070 }, { "epoch": 25.291997116077866, "grad_norm": 0.15070779621601105, "learning_rate": 2.2434258122108664e-05, "loss": 0.0298, "step": 35080 }, { "epoch": 25.299206921413123, "grad_norm": 0.12284815311431885, "learning_rate": 2.2406674445817356e-05, "loss": 0.0242, "step": 35090 }, { "epoch": 25.306416726748377, "grad_norm": 0.18141920864582062, "learning_rate": 2.2379102839784083e-05, "loss": 0.0255, "step": 35100 }, { "epoch": 25.313626532083635, "grad_norm": 0.23539501428604126, "learning_rate": 2.2351543316069645e-05, "loss": 0.0237, "step": 35110 }, { "epoch": 25.32083633741889, "grad_norm": 0.15508751571178436, "learning_rate": 2.2323995886729493e-05, "loss": 0.0273, "step": 35120 }, { "epoch": 25.328046142754147, "grad_norm": 0.14864902198314667, "learning_rate": 2.2296460563813786e-05, "loss": 0.0279, "step": 35130 }, { "epoch": 25.3352559480894, "grad_norm": 0.16693583130836487, "learning_rate": 2.2268937359367457e-05, "loss": 0.0287, "step": 35140 }, { "epoch": 25.34246575342466, "grad_norm": 0.15769121050834656, "learning_rate": 2.224142628543006e-05, "loss": 0.0276, "step": 35150 }, { "epoch": 25.349675558759913, "grad_norm": 0.16058944165706635, "learning_rate": 2.2213927354035863e-05, "loss": 0.03, "step": 35160 }, { "epoch": 25.35688536409517, "grad_norm": 0.11322484910488129, "learning_rate": 2.218644057721387e-05, "loss": 0.0254, "step": 35170 }, { "epoch": 25.364095169430424, "grad_norm": 0.15577031672000885, "learning_rate": 2.2158965966987703e-05, "loss": 0.0322, "step": 35180 }, { "epoch": 25.371304974765682, "grad_norm": 0.15463435649871826, "learning_rate": 2.2131503535375687e-05, "loss": 0.0261, "step": 35190 }, { "epoch": 25.378514780100936, "grad_norm": 0.1490086317062378, "learning_rate": 2.2104053294390846e-05, "loss": 0.0252, "step": 35200 }, { "epoch": 25.385724585436193, "grad_norm": 0.14396043121814728, "learning_rate": 2.2076615256040834e-05, "loss": 0.0274, "step": 35210 }, { "epoch": 25.392934390771448, "grad_norm": 0.17184729874134064, "learning_rate": 2.2049189432327966e-05, "loss": 0.0309, "step": 35220 }, { "epoch": 25.400144196106705, "grad_norm": 0.1233995333313942, "learning_rate": 2.202177583524926e-05, "loss": 0.025, "step": 35230 }, { "epoch": 25.407354001441963, "grad_norm": 0.14674435555934906, "learning_rate": 2.1994374476796343e-05, "loss": 0.0239, "step": 35240 }, { "epoch": 25.414563806777217, "grad_norm": 0.10573945939540863, "learning_rate": 2.1966985368955477e-05, "loss": 0.0206, "step": 35250 }, { "epoch": 25.421773612112474, "grad_norm": 0.17425306141376495, "learning_rate": 2.193960852370762e-05, "loss": 0.0287, "step": 35260 }, { "epoch": 25.42898341744773, "grad_norm": 0.12370327115058899, "learning_rate": 2.1912243953028317e-05, "loss": 0.027, "step": 35270 }, { "epoch": 25.436193222782986, "grad_norm": 0.1333025097846985, "learning_rate": 2.1884891668887776e-05, "loss": 0.0302, "step": 35280 }, { "epoch": 25.44340302811824, "grad_norm": 0.1975245177745819, "learning_rate": 2.1857551683250787e-05, "loss": 0.0273, "step": 35290 }, { "epoch": 25.450612833453498, "grad_norm": 0.14069007337093353, "learning_rate": 2.1830224008076827e-05, "loss": 0.0305, "step": 35300 }, { "epoch": 25.457822638788752, "grad_norm": 0.14315049350261688, "learning_rate": 2.1802908655319924e-05, "loss": 0.0261, "step": 35310 }, { "epoch": 25.46503244412401, "grad_norm": 0.17311207950115204, "learning_rate": 2.1775605636928738e-05, "loss": 0.0319, "step": 35320 }, { "epoch": 25.472242249459264, "grad_norm": 0.1466410905122757, "learning_rate": 2.1748314964846555e-05, "loss": 0.0266, "step": 35330 }, { "epoch": 25.47945205479452, "grad_norm": 0.17590700089931488, "learning_rate": 2.172103665101124e-05, "loss": 0.0232, "step": 35340 }, { "epoch": 25.486661860129775, "grad_norm": 0.12027298659086227, "learning_rate": 2.1693770707355226e-05, "loss": 0.0194, "step": 35350 }, { "epoch": 25.493871665465033, "grad_norm": 0.14574559032917023, "learning_rate": 2.1666517145805605e-05, "loss": 0.0233, "step": 35360 }, { "epoch": 25.501081470800287, "grad_norm": 0.1122458204627037, "learning_rate": 2.1639275978283985e-05, "loss": 0.022, "step": 35370 }, { "epoch": 25.508291276135544, "grad_norm": 0.12321050465106964, "learning_rate": 2.1612047216706567e-05, "loss": 0.0262, "step": 35380 }, { "epoch": 25.515501081470802, "grad_norm": 0.1402646154165268, "learning_rate": 2.158483087298417e-05, "loss": 0.0309, "step": 35390 }, { "epoch": 25.522710886806056, "grad_norm": 0.14148163795471191, "learning_rate": 2.155762695902213e-05, "loss": 0.0262, "step": 35400 }, { "epoch": 25.529920692141314, "grad_norm": 0.13417430222034454, "learning_rate": 2.1530435486720363e-05, "loss": 0.0279, "step": 35410 }, { "epoch": 25.537130497476568, "grad_norm": 0.14242936670780182, "learning_rate": 2.1503256467973327e-05, "loss": 0.0255, "step": 35420 }, { "epoch": 25.544340302811825, "grad_norm": 0.14284634590148926, "learning_rate": 2.147608991467008e-05, "loss": 0.0232, "step": 35430 }, { "epoch": 25.55155010814708, "grad_norm": 0.10244669020175934, "learning_rate": 2.144893583869418e-05, "loss": 0.0229, "step": 35440 }, { "epoch": 25.558759913482337, "grad_norm": 0.16672959923744202, "learning_rate": 2.142179425192372e-05, "loss": 0.0261, "step": 35450 }, { "epoch": 25.56596971881759, "grad_norm": 0.13815763592720032, "learning_rate": 2.1394665166231393e-05, "loss": 0.0269, "step": 35460 }, { "epoch": 25.57317952415285, "grad_norm": 0.09588268399238586, "learning_rate": 2.1367548593484364e-05, "loss": 0.0249, "step": 35470 }, { "epoch": 25.580389329488103, "grad_norm": 0.12472722679376602, "learning_rate": 2.134044454554433e-05, "loss": 0.0248, "step": 35480 }, { "epoch": 25.58759913482336, "grad_norm": 0.1337355077266693, "learning_rate": 2.1313353034267548e-05, "loss": 0.0237, "step": 35490 }, { "epoch": 25.594808940158615, "grad_norm": 0.10753187537193298, "learning_rate": 2.1286274071504735e-05, "loss": 0.0207, "step": 35500 }, { "epoch": 25.602018745493872, "grad_norm": 0.158575177192688, "learning_rate": 2.125920766910119e-05, "loss": 0.0282, "step": 35510 }, { "epoch": 25.609228550829126, "grad_norm": 0.14006394147872925, "learning_rate": 2.1232153838896628e-05, "loss": 0.0256, "step": 35520 }, { "epoch": 25.616438356164384, "grad_norm": 0.1334097981452942, "learning_rate": 2.1205112592725366e-05, "loss": 0.0214, "step": 35530 }, { "epoch": 25.623648161499638, "grad_norm": 0.11803636699914932, "learning_rate": 2.1178083942416114e-05, "loss": 0.0199, "step": 35540 }, { "epoch": 25.630857966834895, "grad_norm": 0.08933792263269424, "learning_rate": 2.1151067899792164e-05, "loss": 0.0197, "step": 35550 }, { "epoch": 25.638067772170153, "grad_norm": 0.13897648453712463, "learning_rate": 2.1124064476671235e-05, "loss": 0.0265, "step": 35560 }, { "epoch": 25.645277577505407, "grad_norm": 0.115059033036232, "learning_rate": 2.1097073684865516e-05, "loss": 0.0238, "step": 35570 }, { "epoch": 25.652487382840665, "grad_norm": 0.12244749069213867, "learning_rate": 2.107009553618174e-05, "loss": 0.022, "step": 35580 }, { "epoch": 25.65969718817592, "grad_norm": 0.1350947767496109, "learning_rate": 2.1043130042421044e-05, "loss": 0.0281, "step": 35590 }, { "epoch": 25.666906993511176, "grad_norm": 0.163841113448143, "learning_rate": 2.1016177215379053e-05, "loss": 0.0226, "step": 35600 }, { "epoch": 25.67411679884643, "grad_norm": 0.23371990025043488, "learning_rate": 2.0989237066845824e-05, "loss": 0.0271, "step": 35610 }, { "epoch": 25.681326604181688, "grad_norm": 0.10827260464429855, "learning_rate": 2.096230960860594e-05, "loss": 0.0232, "step": 35620 }, { "epoch": 25.688536409516942, "grad_norm": 0.1587984710931778, "learning_rate": 2.0935394852438366e-05, "loss": 0.0313, "step": 35630 }, { "epoch": 25.6957462148522, "grad_norm": 0.15417024493217468, "learning_rate": 2.090849281011651e-05, "loss": 0.0227, "step": 35640 }, { "epoch": 25.702956020187454, "grad_norm": 0.13233080506324768, "learning_rate": 2.088160349340827e-05, "loss": 0.0255, "step": 35650 }, { "epoch": 25.71016582552271, "grad_norm": 0.12351920455694199, "learning_rate": 2.085472691407594e-05, "loss": 0.0317, "step": 35660 }, { "epoch": 25.717375630857966, "grad_norm": 0.16010834276676178, "learning_rate": 2.082786308387622e-05, "loss": 0.0269, "step": 35670 }, { "epoch": 25.724585436193223, "grad_norm": 0.14559750258922577, "learning_rate": 2.0801012014560306e-05, "loss": 0.0258, "step": 35680 }, { "epoch": 25.731795241528477, "grad_norm": 0.10798285901546478, "learning_rate": 2.0774173717873746e-05, "loss": 0.0292, "step": 35690 }, { "epoch": 25.739005046863735, "grad_norm": 0.1311042755842209, "learning_rate": 2.074734820555651e-05, "loss": 0.0278, "step": 35700 }, { "epoch": 25.74621485219899, "grad_norm": 0.129685640335083, "learning_rate": 2.0720535489343014e-05, "loss": 0.0265, "step": 35710 }, { "epoch": 25.753424657534246, "grad_norm": 0.18226434290409088, "learning_rate": 2.0693735580962044e-05, "loss": 0.0232, "step": 35720 }, { "epoch": 25.760634462869504, "grad_norm": 0.13421574234962463, "learning_rate": 2.0666948492136766e-05, "loss": 0.0242, "step": 35730 }, { "epoch": 25.767844268204758, "grad_norm": 0.17010757327079773, "learning_rate": 2.0640174234584797e-05, "loss": 0.0252, "step": 35740 }, { "epoch": 25.775054073540016, "grad_norm": 0.10874222964048386, "learning_rate": 2.061341282001808e-05, "loss": 0.0252, "step": 35750 }, { "epoch": 25.78226387887527, "grad_norm": 0.15074048936367035, "learning_rate": 2.0586664260142997e-05, "loss": 0.0235, "step": 35760 }, { "epoch": 25.789473684210527, "grad_norm": 0.16354644298553467, "learning_rate": 2.0559928566660237e-05, "loss": 0.0217, "step": 35770 }, { "epoch": 25.79668348954578, "grad_norm": 0.20865745842456818, "learning_rate": 2.0533205751264945e-05, "loss": 0.0269, "step": 35780 }, { "epoch": 25.80389329488104, "grad_norm": 0.1816958338022232, "learning_rate": 2.050649582564656e-05, "loss": 0.026, "step": 35790 }, { "epoch": 25.811103100216293, "grad_norm": 0.13111169636249542, "learning_rate": 2.04797988014889e-05, "loss": 0.0234, "step": 35800 }, { "epoch": 25.81831290555155, "grad_norm": 0.1283712238073349, "learning_rate": 2.0453114690470194e-05, "loss": 0.0211, "step": 35810 }, { "epoch": 25.825522710886805, "grad_norm": 0.15830601751804352, "learning_rate": 2.0426443504262944e-05, "loss": 0.0257, "step": 35820 }, { "epoch": 25.832732516222062, "grad_norm": 0.1443362832069397, "learning_rate": 2.0399785254534027e-05, "loss": 0.023, "step": 35830 }, { "epoch": 25.839942321557317, "grad_norm": 0.1842944324016571, "learning_rate": 2.0373139952944697e-05, "loss": 0.0263, "step": 35840 }, { "epoch": 25.847152126892574, "grad_norm": 0.1382659673690796, "learning_rate": 2.03465076111505e-05, "loss": 0.027, "step": 35850 }, { "epoch": 25.854361932227828, "grad_norm": 0.14303907752037048, "learning_rate": 2.0319888240801305e-05, "loss": 0.0278, "step": 35860 }, { "epoch": 25.861571737563086, "grad_norm": 0.1348443329334259, "learning_rate": 2.0293281853541375e-05, "loss": 0.0236, "step": 35870 }, { "epoch": 25.868781542898343, "grad_norm": 0.126367449760437, "learning_rate": 2.0266688461009216e-05, "loss": 0.023, "step": 35880 }, { "epoch": 25.875991348233597, "grad_norm": 0.1639271378517151, "learning_rate": 2.0240108074837666e-05, "loss": 0.0257, "step": 35890 }, { "epoch": 25.883201153568855, "grad_norm": 0.16235089302062988, "learning_rate": 2.021354070665393e-05, "loss": 0.0271, "step": 35900 }, { "epoch": 25.89041095890411, "grad_norm": 0.13219794631004333, "learning_rate": 2.0186986368079465e-05, "loss": 0.0208, "step": 35910 }, { "epoch": 25.897620764239367, "grad_norm": 0.1647973358631134, "learning_rate": 2.016044507073001e-05, "loss": 0.0287, "step": 35920 }, { "epoch": 25.90483056957462, "grad_norm": 0.15669426321983337, "learning_rate": 2.013391682621567e-05, "loss": 0.0227, "step": 35930 }, { "epoch": 25.91204037490988, "grad_norm": 0.16581450402736664, "learning_rate": 2.0107401646140787e-05, "loss": 0.0261, "step": 35940 }, { "epoch": 25.919250180245132, "grad_norm": 0.1278562694787979, "learning_rate": 2.0080899542104003e-05, "loss": 0.0304, "step": 35950 }, { "epoch": 25.92645998558039, "grad_norm": 0.11095021665096283, "learning_rate": 2.0054410525698215e-05, "loss": 0.0258, "step": 35960 }, { "epoch": 25.933669790915644, "grad_norm": 0.11377903819084167, "learning_rate": 2.0027934608510653e-05, "loss": 0.0206, "step": 35970 }, { "epoch": 25.940879596250902, "grad_norm": 0.12450672686100006, "learning_rate": 2.000147180212275e-05, "loss": 0.0226, "step": 35980 }, { "epoch": 25.948089401586156, "grad_norm": 0.2284003347158432, "learning_rate": 1.9975022118110275e-05, "loss": 0.024, "step": 35990 }, { "epoch": 25.955299206921413, "grad_norm": 0.13246865570545197, "learning_rate": 1.994858556804318e-05, "loss": 0.0269, "step": 36000 }, { "epoch": 25.962509012256668, "grad_norm": 0.12860535085201263, "learning_rate": 1.9922162163485748e-05, "loss": 0.025, "step": 36010 }, { "epoch": 25.969718817591925, "grad_norm": 0.2590573728084564, "learning_rate": 1.989575191599643e-05, "loss": 0.0249, "step": 36020 }, { "epoch": 25.976928622927183, "grad_norm": 0.15844570100307465, "learning_rate": 1.9869354837128012e-05, "loss": 0.0277, "step": 36030 }, { "epoch": 25.984138428262437, "grad_norm": 0.13778021931648254, "learning_rate": 1.984297093842745e-05, "loss": 0.0298, "step": 36040 }, { "epoch": 25.991348233597694, "grad_norm": 0.13253825902938843, "learning_rate": 1.981660023143594e-05, "loss": 0.0246, "step": 36050 }, { "epoch": 25.99855803893295, "grad_norm": 0.12488724291324615, "learning_rate": 1.9790242727688963e-05, "loss": 0.0227, "step": 36060 }, { "epoch": 26.005767844268206, "grad_norm": 0.1897813081741333, "learning_rate": 1.9763898438716165e-05, "loss": 0.0277, "step": 36070 }, { "epoch": 26.01297764960346, "grad_norm": 0.131927028298378, "learning_rate": 1.973756737604142e-05, "loss": 0.0268, "step": 36080 }, { "epoch": 26.020187454938718, "grad_norm": 0.1454305648803711, "learning_rate": 1.9711249551182858e-05, "loss": 0.0278, "step": 36090 }, { "epoch": 26.027397260273972, "grad_norm": 0.20253631472587585, "learning_rate": 1.968494497565278e-05, "loss": 0.0292, "step": 36100 }, { "epoch": 26.03460706560923, "grad_norm": 0.22581391036510468, "learning_rate": 1.9658653660957698e-05, "loss": 0.0323, "step": 36110 }, { "epoch": 26.041816870944483, "grad_norm": 0.20183491706848145, "learning_rate": 1.9632375618598302e-05, "loss": 0.025, "step": 36120 }, { "epoch": 26.04902667627974, "grad_norm": 0.1617654711008072, "learning_rate": 1.960611086006955e-05, "loss": 0.0232, "step": 36130 }, { "epoch": 26.056236481614995, "grad_norm": 0.0936049073934555, "learning_rate": 1.9579859396860513e-05, "loss": 0.0232, "step": 36140 }, { "epoch": 26.063446286950253, "grad_norm": 0.09340464323759079, "learning_rate": 1.9553621240454452e-05, "loss": 0.0246, "step": 36150 }, { "epoch": 26.070656092285507, "grad_norm": 0.15596434473991394, "learning_rate": 1.9527396402328864e-05, "loss": 0.0242, "step": 36160 }, { "epoch": 26.077865897620764, "grad_norm": 0.12282692641019821, "learning_rate": 1.9501184893955376e-05, "loss": 0.0257, "step": 36170 }, { "epoch": 26.08507570295602, "grad_norm": 0.0966605469584465, "learning_rate": 1.9474986726799765e-05, "loss": 0.027, "step": 36180 }, { "epoch": 26.092285508291276, "grad_norm": 0.12806211411952972, "learning_rate": 1.944880191232204e-05, "loss": 0.0222, "step": 36190 }, { "epoch": 26.099495313626534, "grad_norm": 0.0740165263414383, "learning_rate": 1.9422630461976304e-05, "loss": 0.0261, "step": 36200 }, { "epoch": 26.106705118961788, "grad_norm": 0.22862625122070312, "learning_rate": 1.9396472387210835e-05, "loss": 0.0295, "step": 36210 }, { "epoch": 26.113914924297045, "grad_norm": 0.09811149537563324, "learning_rate": 1.9370327699468082e-05, "loss": 0.0208, "step": 36220 }, { "epoch": 26.1211247296323, "grad_norm": 0.13995200395584106, "learning_rate": 1.9344196410184594e-05, "loss": 0.0257, "step": 36230 }, { "epoch": 26.128334534967557, "grad_norm": 0.18551921844482422, "learning_rate": 1.9318078530791123e-05, "loss": 0.0247, "step": 36240 }, { "epoch": 26.13554434030281, "grad_norm": 0.14232125878334045, "learning_rate": 1.9291974072712473e-05, "loss": 0.0249, "step": 36250 }, { "epoch": 26.14275414563807, "grad_norm": 0.12963400781154633, "learning_rate": 1.9265883047367656e-05, "loss": 0.0256, "step": 36260 }, { "epoch": 26.149963950973323, "grad_norm": 0.10664746910333633, "learning_rate": 1.9239805466169748e-05, "loss": 0.0201, "step": 36270 }, { "epoch": 26.15717375630858, "grad_norm": 0.1536785066127777, "learning_rate": 1.9213741340525992e-05, "loss": 0.0282, "step": 36280 }, { "epoch": 26.164383561643834, "grad_norm": 0.15063776075839996, "learning_rate": 1.9187690681837713e-05, "loss": 0.0278, "step": 36290 }, { "epoch": 26.171593366979092, "grad_norm": 0.12028547376394272, "learning_rate": 1.916165350150035e-05, "loss": 0.0309, "step": 36300 }, { "epoch": 26.178803172314346, "grad_norm": 0.12371984869241714, "learning_rate": 1.9135629810903434e-05, "loss": 0.0267, "step": 36310 }, { "epoch": 26.186012977649604, "grad_norm": 0.16543275117874146, "learning_rate": 1.9109619621430648e-05, "loss": 0.0239, "step": 36320 }, { "epoch": 26.193222782984858, "grad_norm": 0.13572341203689575, "learning_rate": 1.9083622944459712e-05, "loss": 0.0249, "step": 36330 }, { "epoch": 26.200432588320115, "grad_norm": 0.1505180150270462, "learning_rate": 1.9057639791362437e-05, "loss": 0.0252, "step": 36340 }, { "epoch": 26.20764239365537, "grad_norm": 0.13257811963558197, "learning_rate": 1.903167017350478e-05, "loss": 0.0334, "step": 36350 }, { "epoch": 26.214852198990627, "grad_norm": 0.1825122982263565, "learning_rate": 1.9005714102246707e-05, "loss": 0.0243, "step": 36360 }, { "epoch": 26.222062004325885, "grad_norm": 0.1341269463300705, "learning_rate": 1.897977158894227e-05, "loss": 0.0253, "step": 36370 }, { "epoch": 26.22927180966114, "grad_norm": 0.1999538689851761, "learning_rate": 1.8953842644939644e-05, "loss": 0.0227, "step": 36380 }, { "epoch": 26.236481614996396, "grad_norm": 0.12935246527194977, "learning_rate": 1.8927927281581015e-05, "loss": 0.0257, "step": 36390 }, { "epoch": 26.24369142033165, "grad_norm": 0.14773237705230713, "learning_rate": 1.890202551020262e-05, "loss": 0.0246, "step": 36400 }, { "epoch": 26.250901225666908, "grad_norm": 0.13877712190151215, "learning_rate": 1.8876137342134813e-05, "loss": 0.0202, "step": 36410 }, { "epoch": 26.258111031002162, "grad_norm": 0.1714971512556076, "learning_rate": 1.885026278870194e-05, "loss": 0.0266, "step": 36420 }, { "epoch": 26.26532083633742, "grad_norm": 0.11844853311777115, "learning_rate": 1.8824401861222384e-05, "loss": 0.0256, "step": 36430 }, { "epoch": 26.272530641672674, "grad_norm": 0.17939789593219757, "learning_rate": 1.879855457100864e-05, "loss": 0.0247, "step": 36440 }, { "epoch": 26.27974044700793, "grad_norm": 0.14085131883621216, "learning_rate": 1.877272092936717e-05, "loss": 0.027, "step": 36450 }, { "epoch": 26.286950252343185, "grad_norm": 0.19811491668224335, "learning_rate": 1.8746900947598467e-05, "loss": 0.0279, "step": 36460 }, { "epoch": 26.294160057678443, "grad_norm": 0.1310824453830719, "learning_rate": 1.87210946369971e-05, "loss": 0.0222, "step": 36470 }, { "epoch": 26.301369863013697, "grad_norm": 0.14493632316589355, "learning_rate": 1.8695302008851602e-05, "loss": 0.0264, "step": 36480 }, { "epoch": 26.308579668348955, "grad_norm": 0.13378839194774628, "learning_rate": 1.8669523074444568e-05, "loss": 0.0233, "step": 36490 }, { "epoch": 26.31578947368421, "grad_norm": 0.12599903345108032, "learning_rate": 1.8643757845052545e-05, "loss": 0.0257, "step": 36500 }, { "epoch": 26.322999279019466, "grad_norm": 0.13123764097690582, "learning_rate": 1.8618006331946164e-05, "loss": 0.0265, "step": 36510 }, { "epoch": 26.330209084354724, "grad_norm": 0.1702425330877304, "learning_rate": 1.859226854638999e-05, "loss": 0.0247, "step": 36520 }, { "epoch": 26.337418889689978, "grad_norm": 0.17541815340518951, "learning_rate": 1.8566544499642587e-05, "loss": 0.0243, "step": 36530 }, { "epoch": 26.344628695025236, "grad_norm": 0.10469325631856918, "learning_rate": 1.854083420295656e-05, "loss": 0.0245, "step": 36540 }, { "epoch": 26.35183850036049, "grad_norm": 0.19155006110668182, "learning_rate": 1.8515137667578453e-05, "loss": 0.0278, "step": 36550 }, { "epoch": 26.359048305695747, "grad_norm": 0.10117001831531525, "learning_rate": 1.8489454904748784e-05, "loss": 0.0248, "step": 36560 }, { "epoch": 26.366258111031, "grad_norm": 0.19407348334789276, "learning_rate": 1.8463785925702094e-05, "loss": 0.0289, "step": 36570 }, { "epoch": 26.37346791636626, "grad_norm": 0.14946910738945007, "learning_rate": 1.843813074166686e-05, "loss": 0.0242, "step": 36580 }, { "epoch": 26.380677721701513, "grad_norm": 0.1090705543756485, "learning_rate": 1.841248936386551e-05, "loss": 0.0268, "step": 36590 }, { "epoch": 26.38788752703677, "grad_norm": 0.1240985170006752, "learning_rate": 1.8386861803514483e-05, "loss": 0.0248, "step": 36600 }, { "epoch": 26.395097332372025, "grad_norm": 0.1627022922039032, "learning_rate": 1.836124807182414e-05, "loss": 0.0254, "step": 36610 }, { "epoch": 26.402307137707282, "grad_norm": 0.1397005021572113, "learning_rate": 1.8335648179998783e-05, "loss": 0.0259, "step": 36620 }, { "epoch": 26.409516943042536, "grad_norm": 0.15254992246627808, "learning_rate": 1.8310062139236667e-05, "loss": 0.0247, "step": 36630 }, { "epoch": 26.416726748377794, "grad_norm": 0.16393710672855377, "learning_rate": 1.828448996073002e-05, "loss": 0.0305, "step": 36640 }, { "epoch": 26.423936553713048, "grad_norm": 0.162037655711174, "learning_rate": 1.825893165566498e-05, "loss": 0.0247, "step": 36650 }, { "epoch": 26.431146359048306, "grad_norm": 0.18312503397464752, "learning_rate": 1.823338723522159e-05, "loss": 0.0271, "step": 36660 }, { "epoch": 26.438356164383563, "grad_norm": 0.2398432344198227, "learning_rate": 1.8207856710573878e-05, "loss": 0.0276, "step": 36670 }, { "epoch": 26.445565969718817, "grad_norm": 0.13092760741710663, "learning_rate": 1.8182340092889756e-05, "loss": 0.0311, "step": 36680 }, { "epoch": 26.452775775054075, "grad_norm": 0.1842450052499771, "learning_rate": 1.8156837393331038e-05, "loss": 0.0284, "step": 36690 }, { "epoch": 26.45998558038933, "grad_norm": 0.16728660464286804, "learning_rate": 1.81313486230535e-05, "loss": 0.0257, "step": 36700 }, { "epoch": 26.467195385724587, "grad_norm": 0.1920023262500763, "learning_rate": 1.8105873793206773e-05, "loss": 0.0267, "step": 36710 }, { "epoch": 26.47440519105984, "grad_norm": 0.18494997918605804, "learning_rate": 1.8080412914934436e-05, "loss": 0.0231, "step": 36720 }, { "epoch": 26.4816149963951, "grad_norm": 0.13964544236660004, "learning_rate": 1.805496599937392e-05, "loss": 0.0261, "step": 36730 }, { "epoch": 26.488824801730352, "grad_norm": 0.1585734486579895, "learning_rate": 1.802953305765659e-05, "loss": 0.0233, "step": 36740 }, { "epoch": 26.49603460706561, "grad_norm": 0.12681278586387634, "learning_rate": 1.8004114100907653e-05, "loss": 0.0252, "step": 36750 }, { "epoch": 26.503244412400864, "grad_norm": 0.15807048976421356, "learning_rate": 1.7978709140246263e-05, "loss": 0.0283, "step": 36760 }, { "epoch": 26.51045421773612, "grad_norm": 0.17110766470432281, "learning_rate": 1.795331818678539e-05, "loss": 0.0293, "step": 36770 }, { "epoch": 26.517664023071376, "grad_norm": 0.11537260562181473, "learning_rate": 1.792794125163188e-05, "loss": 0.0255, "step": 36780 }, { "epoch": 26.524873828406633, "grad_norm": 0.1277586668729782, "learning_rate": 1.7902578345886507e-05, "loss": 0.0229, "step": 36790 }, { "epoch": 26.532083633741887, "grad_norm": 0.20780926942825317, "learning_rate": 1.7877229480643843e-05, "loss": 0.0286, "step": 36800 }, { "epoch": 26.539293439077145, "grad_norm": 0.1848265528678894, "learning_rate": 1.785189466699235e-05, "loss": 0.0253, "step": 36810 }, { "epoch": 26.5465032444124, "grad_norm": 0.11959514766931534, "learning_rate": 1.7826573916014317e-05, "loss": 0.0309, "step": 36820 }, { "epoch": 26.553713049747657, "grad_norm": 0.18878860771656036, "learning_rate": 1.7801267238785935e-05, "loss": 0.0252, "step": 36830 }, { "epoch": 26.560922855082914, "grad_norm": 0.10359735786914825, "learning_rate": 1.7775974646377187e-05, "loss": 0.0253, "step": 36840 }, { "epoch": 26.56813266041817, "grad_norm": 0.1746523678302765, "learning_rate": 1.775069614985189e-05, "loss": 0.0254, "step": 36850 }, { "epoch": 26.575342465753426, "grad_norm": 0.14286336302757263, "learning_rate": 1.7725431760267757e-05, "loss": 0.0256, "step": 36860 }, { "epoch": 26.58255227108868, "grad_norm": 0.15820203721523285, "learning_rate": 1.7700181488676277e-05, "loss": 0.0306, "step": 36870 }, { "epoch": 26.589762076423938, "grad_norm": 0.13194333016872406, "learning_rate": 1.767494534612275e-05, "loss": 0.0217, "step": 36880 }, { "epoch": 26.596971881759192, "grad_norm": 0.15808604657649994, "learning_rate": 1.7649723343646367e-05, "loss": 0.0243, "step": 36890 }, { "epoch": 26.60418168709445, "grad_norm": 0.09000818431377411, "learning_rate": 1.7624515492280063e-05, "loss": 0.0235, "step": 36900 }, { "epoch": 26.611391492429703, "grad_norm": 0.08390951156616211, "learning_rate": 1.7599321803050596e-05, "loss": 0.0296, "step": 36910 }, { "epoch": 26.61860129776496, "grad_norm": 0.13386297225952148, "learning_rate": 1.7574142286978574e-05, "loss": 0.0251, "step": 36920 }, { "epoch": 26.625811103100215, "grad_norm": 0.16204406321048737, "learning_rate": 1.7548976955078354e-05, "loss": 0.0226, "step": 36930 }, { "epoch": 26.633020908435473, "grad_norm": 0.14251220226287842, "learning_rate": 1.7523825818358092e-05, "loss": 0.0273, "step": 36940 }, { "epoch": 26.640230713770727, "grad_norm": 0.13075628876686096, "learning_rate": 1.7498688887819785e-05, "loss": 0.0225, "step": 36950 }, { "epoch": 26.647440519105984, "grad_norm": 0.09136246144771576, "learning_rate": 1.7473566174459143e-05, "loss": 0.0233, "step": 36960 }, { "epoch": 26.65465032444124, "grad_norm": 0.14463971555233002, "learning_rate": 1.7448457689265735e-05, "loss": 0.0249, "step": 36970 }, { "epoch": 26.661860129776496, "grad_norm": 0.14323966205120087, "learning_rate": 1.7423363443222823e-05, "loss": 0.0246, "step": 36980 }, { "epoch": 26.66906993511175, "grad_norm": 0.13858337700366974, "learning_rate": 1.7398283447307524e-05, "loss": 0.0271, "step": 36990 }, { "epoch": 26.676279740447008, "grad_norm": 0.19951151311397552, "learning_rate": 1.737321771249066e-05, "loss": 0.0265, "step": 37000 }, { "epoch": 26.683489545782265, "grad_norm": 0.15429435670375824, "learning_rate": 1.7348166249736814e-05, "loss": 0.0267, "step": 37010 }, { "epoch": 26.69069935111752, "grad_norm": 0.1644938588142395, "learning_rate": 1.732312907000439e-05, "loss": 0.0241, "step": 37020 }, { "epoch": 26.697909156452777, "grad_norm": 0.1144598051905632, "learning_rate": 1.7298106184245478e-05, "loss": 0.0258, "step": 37030 }, { "epoch": 26.70511896178803, "grad_norm": 0.19633440673351288, "learning_rate": 1.727309760340592e-05, "loss": 0.0308, "step": 37040 }, { "epoch": 26.71232876712329, "grad_norm": 0.16057650744915009, "learning_rate": 1.724810333842536e-05, "loss": 0.0249, "step": 37050 }, { "epoch": 26.719538572458543, "grad_norm": 0.1491532325744629, "learning_rate": 1.7223123400237112e-05, "loss": 0.0275, "step": 37060 }, { "epoch": 26.7267483777938, "grad_norm": 0.15630242228507996, "learning_rate": 1.7198157799768238e-05, "loss": 0.0242, "step": 37070 }, { "epoch": 26.733958183129054, "grad_norm": 0.14978279173374176, "learning_rate": 1.7173206547939576e-05, "loss": 0.024, "step": 37080 }, { "epoch": 26.741167988464312, "grad_norm": 0.1811758428812027, "learning_rate": 1.7148269655665628e-05, "loss": 0.0246, "step": 37090 }, { "epoch": 26.748377793799566, "grad_norm": 0.12243153899908066, "learning_rate": 1.712334713385463e-05, "loss": 0.0302, "step": 37100 }, { "epoch": 26.755587599134824, "grad_norm": 0.17012012004852295, "learning_rate": 1.709843899340856e-05, "loss": 0.0232, "step": 37110 }, { "epoch": 26.762797404470078, "grad_norm": 0.12421667575836182, "learning_rate": 1.7073545245223084e-05, "loss": 0.0273, "step": 37120 }, { "epoch": 26.770007209805335, "grad_norm": 0.14568638801574707, "learning_rate": 1.704866590018755e-05, "loss": 0.027, "step": 37130 }, { "epoch": 26.77721701514059, "grad_norm": 0.2244991511106491, "learning_rate": 1.7023800969185054e-05, "loss": 0.0261, "step": 37140 }, { "epoch": 26.784426820475847, "grad_norm": 0.12756185233592987, "learning_rate": 1.6998950463092354e-05, "loss": 0.0228, "step": 37150 }, { "epoch": 26.791636625811105, "grad_norm": 0.1448517143726349, "learning_rate": 1.6974114392779907e-05, "loss": 0.0305, "step": 37160 }, { "epoch": 26.79884643114636, "grad_norm": 0.12078554183244705, "learning_rate": 1.694929276911183e-05, "loss": 0.0213, "step": 37170 }, { "epoch": 26.806056236481616, "grad_norm": 0.12357048690319061, "learning_rate": 1.6924485602945983e-05, "loss": 0.0209, "step": 37180 }, { "epoch": 26.81326604181687, "grad_norm": 0.14024315774440765, "learning_rate": 1.6899692905133825e-05, "loss": 0.0228, "step": 37190 }, { "epoch": 26.820475847152128, "grad_norm": 0.15633532404899597, "learning_rate": 1.6874914686520565e-05, "loss": 0.0273, "step": 37200 }, { "epoch": 26.827685652487382, "grad_norm": 0.165896475315094, "learning_rate": 1.6850150957945e-05, "loss": 0.0239, "step": 37210 }, { "epoch": 26.83489545782264, "grad_norm": 0.13168743252754211, "learning_rate": 1.682540173023966e-05, "loss": 0.0301, "step": 37220 }, { "epoch": 26.842105263157894, "grad_norm": 0.11441376805305481, "learning_rate": 1.6800667014230665e-05, "loss": 0.0223, "step": 37230 }, { "epoch": 26.84931506849315, "grad_norm": 0.14626438915729523, "learning_rate": 1.677594682073785e-05, "loss": 0.0232, "step": 37240 }, { "epoch": 26.856524873828405, "grad_norm": 0.1560647189617157, "learning_rate": 1.6751241160574653e-05, "loss": 0.0221, "step": 37250 }, { "epoch": 26.863734679163663, "grad_norm": 0.12225737422704697, "learning_rate": 1.6726550044548156e-05, "loss": 0.0242, "step": 37260 }, { "epoch": 26.870944484498917, "grad_norm": 0.18344709277153015, "learning_rate": 1.6701873483459125e-05, "loss": 0.024, "step": 37270 }, { "epoch": 26.878154289834175, "grad_norm": 0.16423483192920685, "learning_rate": 1.6677211488101906e-05, "loss": 0.0209, "step": 37280 }, { "epoch": 26.88536409516943, "grad_norm": 0.1773700714111328, "learning_rate": 1.6652564069264475e-05, "loss": 0.0276, "step": 37290 }, { "epoch": 26.892573900504686, "grad_norm": 0.1478123515844345, "learning_rate": 1.662793123772849e-05, "loss": 0.0226, "step": 37300 }, { "epoch": 26.899783705839944, "grad_norm": 0.12803874909877777, "learning_rate": 1.6603313004269168e-05, "loss": 0.0206, "step": 37310 }, { "epoch": 26.906993511175198, "grad_norm": 0.15357472002506256, "learning_rate": 1.657870937965536e-05, "loss": 0.0229, "step": 37320 }, { "epoch": 26.914203316510456, "grad_norm": 0.1087171733379364, "learning_rate": 1.655412037464952e-05, "loss": 0.0257, "step": 37330 }, { "epoch": 26.92141312184571, "grad_norm": 0.1514424979686737, "learning_rate": 1.652954600000773e-05, "loss": 0.0197, "step": 37340 }, { "epoch": 26.928622927180967, "grad_norm": 0.15000270307064056, "learning_rate": 1.650498626647965e-05, "loss": 0.0247, "step": 37350 }, { "epoch": 26.93583273251622, "grad_norm": 0.18856748938560486, "learning_rate": 1.6480441184808526e-05, "loss": 0.0264, "step": 37360 }, { "epoch": 26.94304253785148, "grad_norm": 0.16849173605442047, "learning_rate": 1.6455910765731242e-05, "loss": 0.0223, "step": 37370 }, { "epoch": 26.950252343186733, "grad_norm": 0.1295306384563446, "learning_rate": 1.6431395019978217e-05, "loss": 0.0215, "step": 37380 }, { "epoch": 26.95746214852199, "grad_norm": 0.1279909759759903, "learning_rate": 1.6406893958273467e-05, "loss": 0.0225, "step": 37390 }, { "epoch": 26.964671953857245, "grad_norm": 0.11740640550851822, "learning_rate": 1.6382407591334602e-05, "loss": 0.0253, "step": 37400 }, { "epoch": 26.971881759192502, "grad_norm": 0.1380247324705124, "learning_rate": 1.635793592987279e-05, "loss": 0.0244, "step": 37410 }, { "epoch": 26.979091564527756, "grad_norm": 0.11244496703147888, "learning_rate": 1.633347898459275e-05, "loss": 0.0237, "step": 37420 }, { "epoch": 26.986301369863014, "grad_norm": 0.1475248783826828, "learning_rate": 1.6309036766192805e-05, "loss": 0.0287, "step": 37430 }, { "epoch": 26.993511175198268, "grad_norm": 0.18084853887557983, "learning_rate": 1.628460928536478e-05, "loss": 0.0268, "step": 37440 }, { "epoch": 27.000720980533526, "grad_norm": 0.12987211346626282, "learning_rate": 1.6260196552794128e-05, "loss": 0.0245, "step": 37450 }, { "epoch": 27.00793078586878, "grad_norm": 0.1458563208580017, "learning_rate": 1.6235798579159766e-05, "loss": 0.0254, "step": 37460 }, { "epoch": 27.015140591204037, "grad_norm": 0.12139689177274704, "learning_rate": 1.6211415375134233e-05, "loss": 0.0234, "step": 37470 }, { "epoch": 27.022350396539295, "grad_norm": 0.1656932830810547, "learning_rate": 1.618704695138353e-05, "loss": 0.0215, "step": 37480 }, { "epoch": 27.02956020187455, "grad_norm": 0.1734919399023056, "learning_rate": 1.6162693318567273e-05, "loss": 0.0317, "step": 37490 }, { "epoch": 27.036770007209807, "grad_norm": 0.12716792523860931, "learning_rate": 1.613835448733856e-05, "loss": 0.0203, "step": 37500 }, { "epoch": 27.04397981254506, "grad_norm": 0.10275028645992279, "learning_rate": 1.6114030468344006e-05, "loss": 0.0287, "step": 37510 }, { "epoch": 27.05118961788032, "grad_norm": 0.1608656495809555, "learning_rate": 1.6089721272223755e-05, "loss": 0.025, "step": 37520 }, { "epoch": 27.058399423215572, "grad_norm": 0.17565599083900452, "learning_rate": 1.606542690961151e-05, "loss": 0.0229, "step": 37530 }, { "epoch": 27.06560922855083, "grad_norm": 0.14291226863861084, "learning_rate": 1.6041147391134438e-05, "loss": 0.022, "step": 37540 }, { "epoch": 27.072819033886084, "grad_norm": 0.1151362806558609, "learning_rate": 1.6016882727413196e-05, "loss": 0.0234, "step": 37550 }, { "epoch": 27.08002883922134, "grad_norm": 0.12316238135099411, "learning_rate": 1.5992632929062014e-05, "loss": 0.0246, "step": 37560 }, { "epoch": 27.087238644556596, "grad_norm": 0.12237080186605453, "learning_rate": 1.5968398006688557e-05, "loss": 0.0247, "step": 37570 }, { "epoch": 27.094448449891853, "grad_norm": 0.14760801196098328, "learning_rate": 1.594417797089399e-05, "loss": 0.0255, "step": 37580 }, { "epoch": 27.101658255227107, "grad_norm": 0.19406653940677643, "learning_rate": 1.5919972832273015e-05, "loss": 0.0278, "step": 37590 }, { "epoch": 27.108868060562365, "grad_norm": 0.19419479370117188, "learning_rate": 1.5895782601413767e-05, "loss": 0.0208, "step": 37600 }, { "epoch": 27.11607786589762, "grad_norm": 0.16161109507083893, "learning_rate": 1.587160728889785e-05, "loss": 0.03, "step": 37610 }, { "epoch": 27.123287671232877, "grad_norm": 0.18447650969028473, "learning_rate": 1.58474469053004e-05, "loss": 0.0258, "step": 37620 }, { "epoch": 27.130497476568134, "grad_norm": 0.09694988280534744, "learning_rate": 1.5823301461189976e-05, "loss": 0.025, "step": 37630 }, { "epoch": 27.13770728190339, "grad_norm": 0.11180981993675232, "learning_rate": 1.5799170967128606e-05, "loss": 0.0258, "step": 37640 }, { "epoch": 27.144917087238646, "grad_norm": 0.14183774590492249, "learning_rate": 1.5775055433671814e-05, "loss": 0.0228, "step": 37650 }, { "epoch": 27.1521268925739, "grad_norm": 0.13971549272537231, "learning_rate": 1.5750954871368535e-05, "loss": 0.0272, "step": 37660 }, { "epoch": 27.159336697909158, "grad_norm": 0.16884134709835052, "learning_rate": 1.5726869290761158e-05, "loss": 0.024, "step": 37670 }, { "epoch": 27.166546503244412, "grad_norm": 0.17598788440227509, "learning_rate": 1.5702798702385574e-05, "loss": 0.0241, "step": 37680 }, { "epoch": 27.17375630857967, "grad_norm": 0.15437670052051544, "learning_rate": 1.5678743116771038e-05, "loss": 0.0296, "step": 37690 }, { "epoch": 27.180966113914923, "grad_norm": 0.1668752282857895, "learning_rate": 1.5654702544440313e-05, "loss": 0.0238, "step": 37700 }, { "epoch": 27.18817591925018, "grad_norm": 0.1685013771057129, "learning_rate": 1.5630676995909532e-05, "loss": 0.026, "step": 37710 }, { "epoch": 27.195385724585435, "grad_norm": 0.15047064423561096, "learning_rate": 1.560666648168832e-05, "loss": 0.0283, "step": 37720 }, { "epoch": 27.202595529920693, "grad_norm": 0.17195719480514526, "learning_rate": 1.5582671012279663e-05, "loss": 0.0272, "step": 37730 }, { "epoch": 27.209805335255947, "grad_norm": 0.20497913658618927, "learning_rate": 1.5558690598179997e-05, "loss": 0.0248, "step": 37740 }, { "epoch": 27.217015140591204, "grad_norm": 0.2059265524148941, "learning_rate": 1.553472524987919e-05, "loss": 0.0244, "step": 37750 }, { "epoch": 27.22422494592646, "grad_norm": 0.19334623217582703, "learning_rate": 1.5510774977860485e-05, "loss": 0.0213, "step": 37760 }, { "epoch": 27.231434751261716, "grad_norm": 0.15294618904590607, "learning_rate": 1.5486839792600532e-05, "loss": 0.0245, "step": 37770 }, { "epoch": 27.23864455659697, "grad_norm": 0.17158572375774384, "learning_rate": 1.546291970456942e-05, "loss": 0.0274, "step": 37780 }, { "epoch": 27.245854361932228, "grad_norm": 0.1434738039970398, "learning_rate": 1.5439014724230606e-05, "loss": 0.0223, "step": 37790 }, { "epoch": 27.253064167267485, "grad_norm": 0.0987953245639801, "learning_rate": 1.5415124862040907e-05, "loss": 0.0204, "step": 37800 }, { "epoch": 27.26027397260274, "grad_norm": 0.1383799910545349, "learning_rate": 1.53912501284506e-05, "loss": 0.0274, "step": 37810 }, { "epoch": 27.267483777937997, "grad_norm": 0.1188850924372673, "learning_rate": 1.5367390533903292e-05, "loss": 0.0226, "step": 37820 }, { "epoch": 27.27469358327325, "grad_norm": 0.14018891751766205, "learning_rate": 1.5343546088835964e-05, "loss": 0.0241, "step": 37830 }, { "epoch": 27.28190338860851, "grad_norm": 0.15436245501041412, "learning_rate": 1.531971680367901e-05, "loss": 0.0249, "step": 37840 }, { "epoch": 27.289113193943763, "grad_norm": 0.1126566156744957, "learning_rate": 1.529590268885616e-05, "loss": 0.0255, "step": 37850 }, { "epoch": 27.29632299927902, "grad_norm": 0.09054429829120636, "learning_rate": 1.5272103754784517e-05, "loss": 0.0248, "step": 37860 }, { "epoch": 27.303532804614274, "grad_norm": 0.11438952386379242, "learning_rate": 1.5248320011874523e-05, "loss": 0.0257, "step": 37870 }, { "epoch": 27.310742609949532, "grad_norm": 0.20106934010982513, "learning_rate": 1.5224551470530035e-05, "loss": 0.0258, "step": 37880 }, { "epoch": 27.317952415284786, "grad_norm": 0.13830797374248505, "learning_rate": 1.5200798141148193e-05, "loss": 0.0246, "step": 37890 }, { "epoch": 27.325162220620044, "grad_norm": 0.14395542442798615, "learning_rate": 1.5177060034119505e-05, "loss": 0.0239, "step": 37900 }, { "epoch": 27.332372025955298, "grad_norm": 0.1358172595500946, "learning_rate": 1.5153337159827835e-05, "loss": 0.0237, "step": 37910 }, { "epoch": 27.339581831290555, "grad_norm": 0.09246443212032318, "learning_rate": 1.5129629528650397e-05, "loss": 0.0258, "step": 37920 }, { "epoch": 27.34679163662581, "grad_norm": 0.1560167819261551, "learning_rate": 1.5105937150957671e-05, "loss": 0.0244, "step": 37930 }, { "epoch": 27.354001441961067, "grad_norm": 0.10376901179552078, "learning_rate": 1.5082260037113548e-05, "loss": 0.0291, "step": 37940 }, { "epoch": 27.361211247296325, "grad_norm": 0.17540685832500458, "learning_rate": 1.5058598197475187e-05, "loss": 0.0267, "step": 37950 }, { "epoch": 27.36842105263158, "grad_norm": 0.16462992131710052, "learning_rate": 1.5034951642393064e-05, "loss": 0.0231, "step": 37960 }, { "epoch": 27.375630857966836, "grad_norm": 0.16234256327152252, "learning_rate": 1.5011320382211013e-05, "loss": 0.0266, "step": 37970 }, { "epoch": 27.38284066330209, "grad_norm": 0.10908249020576477, "learning_rate": 1.4987704427266136e-05, "loss": 0.0223, "step": 37980 }, { "epoch": 27.390050468637348, "grad_norm": 0.1270713061094284, "learning_rate": 1.4964103787888839e-05, "loss": 0.0255, "step": 37990 }, { "epoch": 27.397260273972602, "grad_norm": 0.16051828861236572, "learning_rate": 1.4940518474402882e-05, "loss": 0.0259, "step": 38000 }, { "epoch": 27.40447007930786, "grad_norm": 0.12206751108169556, "learning_rate": 1.4916948497125249e-05, "loss": 0.0233, "step": 38010 }, { "epoch": 27.411679884643114, "grad_norm": 0.13336457312107086, "learning_rate": 1.4893393866366267e-05, "loss": 0.0269, "step": 38020 }, { "epoch": 27.41888968997837, "grad_norm": 0.15085451304912567, "learning_rate": 1.4869854592429506e-05, "loss": 0.0239, "step": 38030 }, { "epoch": 27.426099495313625, "grad_norm": 0.10103815048933029, "learning_rate": 1.4846330685611875e-05, "loss": 0.0199, "step": 38040 }, { "epoch": 27.433309300648883, "grad_norm": 0.1300446093082428, "learning_rate": 1.482282215620352e-05, "loss": 0.0217, "step": 38050 }, { "epoch": 27.440519105984137, "grad_norm": 0.13722479343414307, "learning_rate": 1.4799329014487857e-05, "loss": 0.0225, "step": 38060 }, { "epoch": 27.447728911319395, "grad_norm": 0.1332329958677292, "learning_rate": 1.4775851270741603e-05, "loss": 0.0249, "step": 38070 }, { "epoch": 27.45493871665465, "grad_norm": 0.18797942996025085, "learning_rate": 1.4752388935234719e-05, "loss": 0.0266, "step": 38080 }, { "epoch": 27.462148521989906, "grad_norm": 0.24891722202301025, "learning_rate": 1.4728942018230401e-05, "loss": 0.024, "step": 38090 }, { "epoch": 27.46935832732516, "grad_norm": 0.11400054395198822, "learning_rate": 1.4705510529985167e-05, "loss": 0.0256, "step": 38100 }, { "epoch": 27.476568132660418, "grad_norm": 0.18099883198738098, "learning_rate": 1.4682094480748726e-05, "loss": 0.0263, "step": 38110 }, { "epoch": 27.483777937995676, "grad_norm": 0.14154577255249023, "learning_rate": 1.4658693880764035e-05, "loss": 0.0259, "step": 38120 }, { "epoch": 27.49098774333093, "grad_norm": 0.1409052312374115, "learning_rate": 1.463530874026735e-05, "loss": 0.0298, "step": 38130 }, { "epoch": 27.498197548666187, "grad_norm": 0.12194864451885223, "learning_rate": 1.461193906948809e-05, "loss": 0.0289, "step": 38140 }, { "epoch": 27.50540735400144, "grad_norm": 0.15111027657985687, "learning_rate": 1.4588584878648975e-05, "loss": 0.0231, "step": 38150 }, { "epoch": 27.5126171593367, "grad_norm": 0.20521000027656555, "learning_rate": 1.4565246177965896e-05, "loss": 0.0241, "step": 38160 }, { "epoch": 27.519826964671953, "grad_norm": 0.21664856374263763, "learning_rate": 1.4541922977648015e-05, "loss": 0.0283, "step": 38170 }, { "epoch": 27.52703677000721, "grad_norm": 0.10845460742712021, "learning_rate": 1.451861528789768e-05, "loss": 0.0239, "step": 38180 }, { "epoch": 27.534246575342465, "grad_norm": 0.11950419843196869, "learning_rate": 1.4495323118910454e-05, "loss": 0.0262, "step": 38190 }, { "epoch": 27.541456380677722, "grad_norm": 0.16198284924030304, "learning_rate": 1.4472046480875145e-05, "loss": 0.0274, "step": 38200 }, { "epoch": 27.548666186012976, "grad_norm": 0.17913579940795898, "learning_rate": 1.4448785383973745e-05, "loss": 0.0293, "step": 38210 }, { "epoch": 27.555875991348234, "grad_norm": 0.18837566673755646, "learning_rate": 1.4425539838381418e-05, "loss": 0.0272, "step": 38220 }, { "epoch": 27.563085796683488, "grad_norm": 0.15160194039344788, "learning_rate": 1.4402309854266594e-05, "loss": 0.0278, "step": 38230 }, { "epoch": 27.570295602018746, "grad_norm": 0.21659117937088013, "learning_rate": 1.4379095441790846e-05, "loss": 0.026, "step": 38240 }, { "epoch": 27.577505407354, "grad_norm": 0.10604346543550491, "learning_rate": 1.4355896611108921e-05, "loss": 0.0236, "step": 38250 }, { "epoch": 27.584715212689257, "grad_norm": 0.1462348997592926, "learning_rate": 1.433271337236881e-05, "loss": 0.0263, "step": 38260 }, { "epoch": 27.591925018024515, "grad_norm": 0.1275828778743744, "learning_rate": 1.4309545735711638e-05, "loss": 0.0253, "step": 38270 }, { "epoch": 27.59913482335977, "grad_norm": 0.14913874864578247, "learning_rate": 1.4286393711271696e-05, "loss": 0.026, "step": 38280 }, { "epoch": 27.606344628695027, "grad_norm": 0.12960505485534668, "learning_rate": 1.4263257309176497e-05, "loss": 0.0248, "step": 38290 }, { "epoch": 27.61355443403028, "grad_norm": 0.1282213181257248, "learning_rate": 1.4240136539546678e-05, "loss": 0.0276, "step": 38300 }, { "epoch": 27.62076423936554, "grad_norm": 0.08705377578735352, "learning_rate": 1.4217031412496029e-05, "loss": 0.0303, "step": 38310 }, { "epoch": 27.627974044700792, "grad_norm": 0.2184276580810547, "learning_rate": 1.4193941938131554e-05, "loss": 0.024, "step": 38320 }, { "epoch": 27.63518385003605, "grad_norm": 0.10764453560113907, "learning_rate": 1.4170868126553356e-05, "loss": 0.0269, "step": 38330 }, { "epoch": 27.642393655371304, "grad_norm": 0.1567414253950119, "learning_rate": 1.4147809987854682e-05, "loss": 0.022, "step": 38340 }, { "epoch": 27.64960346070656, "grad_norm": 0.1290864795446396, "learning_rate": 1.4124767532121985e-05, "loss": 0.0223, "step": 38350 }, { "epoch": 27.656813266041816, "grad_norm": 0.20151564478874207, "learning_rate": 1.41017407694348e-05, "loss": 0.023, "step": 38360 }, { "epoch": 27.664023071377073, "grad_norm": 0.1869264841079712, "learning_rate": 1.4078729709865801e-05, "loss": 0.0282, "step": 38370 }, { "epoch": 27.671232876712327, "grad_norm": 0.15026257932186127, "learning_rate": 1.4055734363480833e-05, "loss": 0.0238, "step": 38380 }, { "epoch": 27.678442682047585, "grad_norm": 0.13604602217674255, "learning_rate": 1.4032754740338815e-05, "loss": 0.0254, "step": 38390 }, { "epoch": 27.68565248738284, "grad_norm": 0.1580410599708557, "learning_rate": 1.4009790850491844e-05, "loss": 0.0252, "step": 38400 }, { "epoch": 27.692862292718097, "grad_norm": 0.12583449482917786, "learning_rate": 1.3986842703985076e-05, "loss": 0.0311, "step": 38410 }, { "epoch": 27.700072098053354, "grad_norm": 0.14037199318408966, "learning_rate": 1.3963910310856842e-05, "loss": 0.0243, "step": 38420 }, { "epoch": 27.70728190338861, "grad_norm": 0.1435457170009613, "learning_rate": 1.394099368113853e-05, "loss": 0.0218, "step": 38430 }, { "epoch": 27.714491708723866, "grad_norm": 0.4349953532218933, "learning_rate": 1.3918092824854644e-05, "loss": 0.0249, "step": 38440 }, { "epoch": 27.72170151405912, "grad_norm": 0.1788967102766037, "learning_rate": 1.3895207752022821e-05, "loss": 0.023, "step": 38450 }, { "epoch": 27.728911319394378, "grad_norm": 0.22997036576271057, "learning_rate": 1.3872338472653756e-05, "loss": 0.025, "step": 38460 }, { "epoch": 27.73612112472963, "grad_norm": 0.11974825710058212, "learning_rate": 1.3849484996751233e-05, "loss": 0.0253, "step": 38470 }, { "epoch": 27.74333093006489, "grad_norm": 0.11716224253177643, "learning_rate": 1.382664733431217e-05, "loss": 0.0254, "step": 38480 }, { "epoch": 27.750540735400143, "grad_norm": 0.11959574371576309, "learning_rate": 1.3803825495326522e-05, "loss": 0.0225, "step": 38490 }, { "epoch": 27.7577505407354, "grad_norm": 0.0781959593296051, "learning_rate": 1.3781019489777324e-05, "loss": 0.0284, "step": 38500 }, { "epoch": 27.764960346070655, "grad_norm": 0.22198350727558136, "learning_rate": 1.375822932764072e-05, "loss": 0.0253, "step": 38510 }, { "epoch": 27.772170151405913, "grad_norm": 0.1303657442331314, "learning_rate": 1.373545501888589e-05, "loss": 0.032, "step": 38520 }, { "epoch": 27.779379956741167, "grad_norm": 0.15123780071735382, "learning_rate": 1.3712696573475092e-05, "loss": 0.0284, "step": 38530 }, { "epoch": 27.786589762076424, "grad_norm": 0.10649798065423965, "learning_rate": 1.368995400136363e-05, "loss": 0.0237, "step": 38540 }, { "epoch": 27.79379956741168, "grad_norm": 0.1142822876572609, "learning_rate": 1.3667227312499903e-05, "loss": 0.0229, "step": 38550 }, { "epoch": 27.801009372746936, "grad_norm": 0.08773733675479889, "learning_rate": 1.3644516516825323e-05, "loss": 0.0247, "step": 38560 }, { "epoch": 27.80821917808219, "grad_norm": 0.19107092916965485, "learning_rate": 1.3621821624274356e-05, "loss": 0.0253, "step": 38570 }, { "epoch": 27.815428983417448, "grad_norm": 0.12392161786556244, "learning_rate": 1.3599142644774537e-05, "loss": 0.03, "step": 38580 }, { "epoch": 27.822638788752705, "grad_norm": 0.16304005682468414, "learning_rate": 1.357647958824641e-05, "loss": 0.0251, "step": 38590 }, { "epoch": 27.82984859408796, "grad_norm": 0.10731256753206253, "learning_rate": 1.3553832464603555e-05, "loss": 0.0254, "step": 38600 }, { "epoch": 27.837058399423217, "grad_norm": 0.12494955956935883, "learning_rate": 1.3531201283752625e-05, "loss": 0.0277, "step": 38610 }, { "epoch": 27.84426820475847, "grad_norm": 0.15689249336719513, "learning_rate": 1.350858605559323e-05, "loss": 0.0245, "step": 38620 }, { "epoch": 27.85147801009373, "grad_norm": 0.1382516771554947, "learning_rate": 1.348598679001808e-05, "loss": 0.0244, "step": 38630 }, { "epoch": 27.858687815428983, "grad_norm": 0.0941573828458786, "learning_rate": 1.3463403496912819e-05, "loss": 0.0247, "step": 38640 }, { "epoch": 27.86589762076424, "grad_norm": 0.18208760023117065, "learning_rate": 1.3440836186156187e-05, "loss": 0.0282, "step": 38650 }, { "epoch": 27.873107426099494, "grad_norm": 0.16417841613292694, "learning_rate": 1.3418284867619852e-05, "loss": 0.0262, "step": 38660 }, { "epoch": 27.880317231434752, "grad_norm": 0.10663869976997375, "learning_rate": 1.3395749551168567e-05, "loss": 0.0232, "step": 38670 }, { "epoch": 27.887527036770006, "grad_norm": 0.1317099630832672, "learning_rate": 1.3373230246660024e-05, "loss": 0.0224, "step": 38680 }, { "epoch": 27.894736842105264, "grad_norm": 0.15935267508029938, "learning_rate": 1.335072696394492e-05, "loss": 0.0239, "step": 38690 }, { "epoch": 27.901946647440518, "grad_norm": 0.11736814677715302, "learning_rate": 1.3328239712866986e-05, "loss": 0.0262, "step": 38700 }, { "epoch": 27.909156452775775, "grad_norm": 0.16681133210659027, "learning_rate": 1.3305768503262889e-05, "loss": 0.0244, "step": 38710 }, { "epoch": 27.91636625811103, "grad_norm": 0.12676528096199036, "learning_rate": 1.3283313344962305e-05, "loss": 0.0241, "step": 38720 }, { "epoch": 27.923576063446287, "grad_norm": 0.16378562152385712, "learning_rate": 1.3260874247787863e-05, "loss": 0.0252, "step": 38730 }, { "epoch": 27.93078586878154, "grad_norm": 0.13531140983104706, "learning_rate": 1.3238451221555226e-05, "loss": 0.0241, "step": 38740 }, { "epoch": 27.9379956741168, "grad_norm": 0.19030678272247314, "learning_rate": 1.3216044276072965e-05, "loss": 0.0288, "step": 38750 }, { "epoch": 27.945205479452056, "grad_norm": 0.1213497593998909, "learning_rate": 1.3193653421142622e-05, "loss": 0.0298, "step": 38760 }, { "epoch": 27.95241528478731, "grad_norm": 0.1486707329750061, "learning_rate": 1.3171278666558756e-05, "loss": 0.0242, "step": 38770 }, { "epoch": 27.959625090122568, "grad_norm": 0.1277139037847519, "learning_rate": 1.3148920022108819e-05, "loss": 0.0268, "step": 38780 }, { "epoch": 27.966834895457822, "grad_norm": 0.1554286777973175, "learning_rate": 1.312657749757324e-05, "loss": 0.0252, "step": 38790 }, { "epoch": 27.97404470079308, "grad_norm": 0.12167059630155563, "learning_rate": 1.3104251102725412e-05, "loss": 0.0252, "step": 38800 }, { "epoch": 27.981254506128334, "grad_norm": 0.17804603278636932, "learning_rate": 1.3081940847331659e-05, "loss": 0.03, "step": 38810 }, { "epoch": 27.98846431146359, "grad_norm": 0.16690266132354736, "learning_rate": 1.3059646741151222e-05, "loss": 0.0213, "step": 38820 }, { "epoch": 27.995674116798845, "grad_norm": 0.17076529562473297, "learning_rate": 1.3037368793936334e-05, "loss": 0.0253, "step": 38830 }, { "epoch": 28.002883922134103, "grad_norm": 0.12503398954868317, "learning_rate": 1.3015107015432104e-05, "loss": 0.0295, "step": 38840 }, { "epoch": 28.010093727469357, "grad_norm": 0.14736123383045197, "learning_rate": 1.2992861415376584e-05, "loss": 0.0252, "step": 38850 }, { "epoch": 28.017303532804615, "grad_norm": 0.1686142385005951, "learning_rate": 1.2970632003500782e-05, "loss": 0.0233, "step": 38860 }, { "epoch": 28.02451333813987, "grad_norm": 0.13355977833271027, "learning_rate": 1.294841878952856e-05, "loss": 0.022, "step": 38870 }, { "epoch": 28.031723143475126, "grad_norm": 0.13512493669986725, "learning_rate": 1.2926221783176779e-05, "loss": 0.0238, "step": 38880 }, { "epoch": 28.03893294881038, "grad_norm": 0.15848608314990997, "learning_rate": 1.290404099415512e-05, "loss": 0.0296, "step": 38890 }, { "epoch": 28.046142754145638, "grad_norm": 0.14246316254138947, "learning_rate": 1.2881876432166246e-05, "loss": 0.024, "step": 38900 }, { "epoch": 28.053352559480896, "grad_norm": 0.19631490111351013, "learning_rate": 1.2859728106905678e-05, "loss": 0.0246, "step": 38910 }, { "epoch": 28.06056236481615, "grad_norm": 0.18592135608196259, "learning_rate": 1.2837596028061832e-05, "loss": 0.0224, "step": 38920 }, { "epoch": 28.067772170151407, "grad_norm": 0.134891077876091, "learning_rate": 1.2815480205316055e-05, "loss": 0.0246, "step": 38930 }, { "epoch": 28.07498197548666, "grad_norm": 0.1526971161365509, "learning_rate": 1.279338064834255e-05, "loss": 0.0254, "step": 38940 }, { "epoch": 28.08219178082192, "grad_norm": 0.13396213948726654, "learning_rate": 1.27712973668084e-05, "loss": 0.0213, "step": 38950 }, { "epoch": 28.089401586157173, "grad_norm": 0.16501197218894958, "learning_rate": 1.2749230370373605e-05, "loss": 0.0237, "step": 38960 }, { "epoch": 28.09661139149243, "grad_norm": 0.09770768880844116, "learning_rate": 1.2727179668691009e-05, "loss": 0.0213, "step": 38970 }, { "epoch": 28.103821196827685, "grad_norm": 0.11381623148918152, "learning_rate": 1.2705145271406327e-05, "loss": 0.0268, "step": 38980 }, { "epoch": 28.111031002162942, "grad_norm": 0.11613435298204422, "learning_rate": 1.2683127188158172e-05, "loss": 0.0257, "step": 38990 }, { "epoch": 28.118240807498196, "grad_norm": 0.15468880534172058, "learning_rate": 1.2661125428577997e-05, "loss": 0.0244, "step": 39000 }, { "epoch": 28.125450612833454, "grad_norm": 0.10479304194450378, "learning_rate": 1.2639140002290101e-05, "loss": 0.0233, "step": 39010 }, { "epoch": 28.132660418168708, "grad_norm": 0.13068059086799622, "learning_rate": 1.261717091891168e-05, "loss": 0.022, "step": 39020 }, { "epoch": 28.139870223503966, "grad_norm": 0.10606788098812103, "learning_rate": 1.2595218188052755e-05, "loss": 0.0281, "step": 39030 }, { "epoch": 28.14708002883922, "grad_norm": 0.13885264098644257, "learning_rate": 1.2573281819316174e-05, "loss": 0.0284, "step": 39040 }, { "epoch": 28.154289834174477, "grad_norm": 0.14574313163757324, "learning_rate": 1.2551361822297674e-05, "loss": 0.0227, "step": 39050 }, { "epoch": 28.161499639509735, "grad_norm": 0.16037710011005402, "learning_rate": 1.2529458206585804e-05, "loss": 0.0263, "step": 39060 }, { "epoch": 28.16870944484499, "grad_norm": 0.1489790827035904, "learning_rate": 1.250757098176194e-05, "loss": 0.0264, "step": 39070 }, { "epoch": 28.175919250180247, "grad_norm": 0.1294347494840622, "learning_rate": 1.2485700157400294e-05, "loss": 0.0244, "step": 39080 }, { "epoch": 28.1831290555155, "grad_norm": 0.13059133291244507, "learning_rate": 1.246384574306792e-05, "loss": 0.0228, "step": 39090 }, { "epoch": 28.19033886085076, "grad_norm": 0.11419612914323807, "learning_rate": 1.2442007748324664e-05, "loss": 0.0195, "step": 39100 }, { "epoch": 28.197548666186012, "grad_norm": 0.13019831478595734, "learning_rate": 1.2420186182723237e-05, "loss": 0.0263, "step": 39110 }, { "epoch": 28.20475847152127, "grad_norm": 0.1701889932155609, "learning_rate": 1.23983810558091e-05, "loss": 0.0246, "step": 39120 }, { "epoch": 28.211968276856524, "grad_norm": 0.07205168157815933, "learning_rate": 1.2376592377120582e-05, "loss": 0.0236, "step": 39130 }, { "epoch": 28.21917808219178, "grad_norm": 0.13851037621498108, "learning_rate": 1.2354820156188768e-05, "loss": 0.0225, "step": 39140 }, { "epoch": 28.226387887527036, "grad_norm": 0.18654942512512207, "learning_rate": 1.2333064402537597e-05, "loss": 0.0254, "step": 39150 }, { "epoch": 28.233597692862293, "grad_norm": 0.16308559477329254, "learning_rate": 1.2311325125683754e-05, "loss": 0.0268, "step": 39160 }, { "epoch": 28.240807498197547, "grad_norm": 0.15909942984580994, "learning_rate": 1.228960233513673e-05, "loss": 0.024, "step": 39170 }, { "epoch": 28.248017303532805, "grad_norm": 0.17995662987232208, "learning_rate": 1.2267896040398836e-05, "loss": 0.0294, "step": 39180 }, { "epoch": 28.25522710886806, "grad_norm": 0.08747970312833786, "learning_rate": 1.2246206250965125e-05, "loss": 0.0216, "step": 39190 }, { "epoch": 28.262436914203317, "grad_norm": 0.1355990171432495, "learning_rate": 1.2224532976323444e-05, "loss": 0.0214, "step": 39200 }, { "epoch": 28.26964671953857, "grad_norm": 0.131618931889534, "learning_rate": 1.220287622595444e-05, "loss": 0.0251, "step": 39210 }, { "epoch": 28.27685652487383, "grad_norm": 0.16484563052654266, "learning_rate": 1.2181236009331497e-05, "loss": 0.0255, "step": 39220 }, { "epoch": 28.284066330209086, "grad_norm": 0.14809353649616241, "learning_rate": 1.2159612335920784e-05, "loss": 0.0256, "step": 39230 }, { "epoch": 28.29127613554434, "grad_norm": 0.143011212348938, "learning_rate": 1.2138005215181219e-05, "loss": 0.0276, "step": 39240 }, { "epoch": 28.298485940879598, "grad_norm": 0.23770911991596222, "learning_rate": 1.2116414656564512e-05, "loss": 0.0224, "step": 39250 }, { "epoch": 28.30569574621485, "grad_norm": 0.17621231079101562, "learning_rate": 1.2094840669515095e-05, "loss": 0.0253, "step": 39260 }, { "epoch": 28.31290555155011, "grad_norm": 0.17564795911312103, "learning_rate": 1.2073283263470152e-05, "loss": 0.0244, "step": 39270 }, { "epoch": 28.320115356885363, "grad_norm": 0.12821786105632782, "learning_rate": 1.2051742447859653e-05, "loss": 0.0219, "step": 39280 }, { "epoch": 28.32732516222062, "grad_norm": 0.14786766469478607, "learning_rate": 1.203021823210626e-05, "loss": 0.0258, "step": 39290 }, { "epoch": 28.334534967555875, "grad_norm": 0.1664513200521469, "learning_rate": 1.2008710625625397e-05, "loss": 0.0247, "step": 39300 }, { "epoch": 28.341744772891133, "grad_norm": 0.08960102498531342, "learning_rate": 1.1987219637825236e-05, "loss": 0.0243, "step": 39310 }, { "epoch": 28.348954578226387, "grad_norm": 0.14141513407230377, "learning_rate": 1.1965745278106656e-05, "loss": 0.0222, "step": 39320 }, { "epoch": 28.356164383561644, "grad_norm": 0.1347072422504425, "learning_rate": 1.1944287555863259e-05, "loss": 0.0267, "step": 39330 }, { "epoch": 28.3633741888969, "grad_norm": 0.1962129771709442, "learning_rate": 1.1922846480481409e-05, "loss": 0.0274, "step": 39340 }, { "epoch": 28.370583994232156, "grad_norm": 0.11118701845407486, "learning_rate": 1.1901422061340133e-05, "loss": 0.0256, "step": 39350 }, { "epoch": 28.37779379956741, "grad_norm": 0.13721613585948944, "learning_rate": 1.1880014307811227e-05, "loss": 0.0291, "step": 39360 }, { "epoch": 28.385003604902668, "grad_norm": 0.17186906933784485, "learning_rate": 1.1858623229259142e-05, "loss": 0.0233, "step": 39370 }, { "epoch": 28.392213410237922, "grad_norm": 0.09138678014278412, "learning_rate": 1.1837248835041092e-05, "loss": 0.0244, "step": 39380 }, { "epoch": 28.39942321557318, "grad_norm": 0.11441919207572937, "learning_rate": 1.1815891134506935e-05, "loss": 0.0221, "step": 39390 }, { "epoch": 28.406633020908437, "grad_norm": 0.12748615443706512, "learning_rate": 1.1794550136999277e-05, "loss": 0.029, "step": 39400 }, { "epoch": 28.41384282624369, "grad_norm": 0.12135352939367294, "learning_rate": 1.1773225851853387e-05, "loss": 0.0212, "step": 39410 }, { "epoch": 28.42105263157895, "grad_norm": 0.1870366334915161, "learning_rate": 1.1751918288397235e-05, "loss": 0.0244, "step": 39420 }, { "epoch": 28.428262436914203, "grad_norm": 0.13192835450172424, "learning_rate": 1.1730627455951449e-05, "loss": 0.0241, "step": 39430 }, { "epoch": 28.43547224224946, "grad_norm": 0.1608777940273285, "learning_rate": 1.17093533638294e-05, "loss": 0.0217, "step": 39440 }, { "epoch": 28.442682047584714, "grad_norm": 0.16861307621002197, "learning_rate": 1.168809602133708e-05, "loss": 0.0249, "step": 39450 }, { "epoch": 28.449891852919972, "grad_norm": 0.17508776485919952, "learning_rate": 1.1666855437773162e-05, "loss": 0.0266, "step": 39460 }, { "epoch": 28.457101658255226, "grad_norm": 0.10665786266326904, "learning_rate": 1.1645631622429026e-05, "loss": 0.0221, "step": 39470 }, { "epoch": 28.464311463590484, "grad_norm": 0.17285247147083282, "learning_rate": 1.1624424584588673e-05, "loss": 0.0238, "step": 39480 }, { "epoch": 28.471521268925738, "grad_norm": 0.194107323884964, "learning_rate": 1.1603234333528768e-05, "loss": 0.0249, "step": 39490 }, { "epoch": 28.478731074260995, "grad_norm": 0.1832561492919922, "learning_rate": 1.1582060878518674e-05, "loss": 0.0236, "step": 39500 }, { "epoch": 28.48594087959625, "grad_norm": 0.20079223811626434, "learning_rate": 1.1560904228820375e-05, "loss": 0.0282, "step": 39510 }, { "epoch": 28.493150684931507, "grad_norm": 0.13129258155822754, "learning_rate": 1.1539764393688478e-05, "loss": 0.0213, "step": 39520 }, { "epoch": 28.50036049026676, "grad_norm": 0.1355437934398651, "learning_rate": 1.151864138237031e-05, "loss": 0.0281, "step": 39530 }, { "epoch": 28.50757029560202, "grad_norm": 0.17193789780139923, "learning_rate": 1.1497535204105769e-05, "loss": 0.0269, "step": 39540 }, { "epoch": 28.514780100937276, "grad_norm": 0.11998898535966873, "learning_rate": 1.14764458681274e-05, "loss": 0.0197, "step": 39550 }, { "epoch": 28.52198990627253, "grad_norm": 0.14569073915481567, "learning_rate": 1.1455373383660434e-05, "loss": 0.0232, "step": 39560 }, { "epoch": 28.529199711607788, "grad_norm": 0.15475396811962128, "learning_rate": 1.1434317759922664e-05, "loss": 0.0253, "step": 39570 }, { "epoch": 28.536409516943042, "grad_norm": 0.17881686985492706, "learning_rate": 1.1413279006124528e-05, "loss": 0.0246, "step": 39580 }, { "epoch": 28.5436193222783, "grad_norm": 0.15494608879089355, "learning_rate": 1.1392257131469119e-05, "loss": 0.0295, "step": 39590 }, { "epoch": 28.550829127613554, "grad_norm": 0.1461700052022934, "learning_rate": 1.1371252145152095e-05, "loss": 0.0248, "step": 39600 }, { "epoch": 28.55803893294881, "grad_norm": 0.10576870292425156, "learning_rate": 1.1350264056361776e-05, "loss": 0.0259, "step": 39610 }, { "epoch": 28.565248738284065, "grad_norm": 0.20914575457572937, "learning_rate": 1.1329292874279029e-05, "loss": 0.0248, "step": 39620 }, { "epoch": 28.572458543619323, "grad_norm": 0.18314291536808014, "learning_rate": 1.1308338608077401e-05, "loss": 0.0275, "step": 39630 }, { "epoch": 28.579668348954577, "grad_norm": 0.16792061924934387, "learning_rate": 1.1287401266922981e-05, "loss": 0.0262, "step": 39640 }, { "epoch": 28.586878154289835, "grad_norm": 0.12221790850162506, "learning_rate": 1.126648085997446e-05, "loss": 0.0233, "step": 39650 }, { "epoch": 28.59408795962509, "grad_norm": 0.19384564459323883, "learning_rate": 1.124557739638316e-05, "loss": 0.0223, "step": 39660 }, { "epoch": 28.601297764960346, "grad_norm": 0.19563055038452148, "learning_rate": 1.1224690885292955e-05, "loss": 0.0257, "step": 39670 }, { "epoch": 28.6085075702956, "grad_norm": 0.13134320080280304, "learning_rate": 1.1203821335840303e-05, "loss": 0.0248, "step": 39680 }, { "epoch": 28.615717375630858, "grad_norm": 0.1592346578836441, "learning_rate": 1.1182968757154278e-05, "loss": 0.0243, "step": 39690 }, { "epoch": 28.622927180966116, "grad_norm": 0.14924775063991547, "learning_rate": 1.1162133158356492e-05, "loss": 0.0239, "step": 39700 }, { "epoch": 28.63013698630137, "grad_norm": 0.07635872066020966, "learning_rate": 1.1141314548561133e-05, "loss": 0.0226, "step": 39710 }, { "epoch": 28.637346791636627, "grad_norm": 0.11330535262823105, "learning_rate": 1.1120512936874994e-05, "loss": 0.0269, "step": 39720 }, { "epoch": 28.64455659697188, "grad_norm": 0.18474963307380676, "learning_rate": 1.109972833239739e-05, "loss": 0.0265, "step": 39730 }, { "epoch": 28.65176640230714, "grad_norm": 0.13594672083854675, "learning_rate": 1.1078960744220218e-05, "loss": 0.0263, "step": 39740 }, { "epoch": 28.658976207642393, "grad_norm": 0.14239798486232758, "learning_rate": 1.1058210181427914e-05, "loss": 0.024, "step": 39750 }, { "epoch": 28.66618601297765, "grad_norm": 0.20159867405891418, "learning_rate": 1.10374766530975e-05, "loss": 0.0234, "step": 39760 }, { "epoch": 28.673395818312905, "grad_norm": 0.19616247713565826, "learning_rate": 1.1016760168298518e-05, "loss": 0.027, "step": 39770 }, { "epoch": 28.680605623648162, "grad_norm": 0.17037604749202728, "learning_rate": 1.0996060736093044e-05, "loss": 0.0257, "step": 39780 }, { "epoch": 28.687815428983416, "grad_norm": 0.16191424429416656, "learning_rate": 1.0975378365535749e-05, "loss": 0.027, "step": 39790 }, { "epoch": 28.695025234318674, "grad_norm": 0.14658699929714203, "learning_rate": 1.0954713065673777e-05, "loss": 0.0232, "step": 39800 }, { "epoch": 28.702235039653928, "grad_norm": 0.14545805752277374, "learning_rate": 1.0934064845546832e-05, "loss": 0.0245, "step": 39810 }, { "epoch": 28.709444844989186, "grad_norm": 0.15345077216625214, "learning_rate": 1.0913433714187166e-05, "loss": 0.0237, "step": 39820 }, { "epoch": 28.71665465032444, "grad_norm": 0.18311232328414917, "learning_rate": 1.089281968061951e-05, "loss": 0.0303, "step": 39830 }, { "epoch": 28.723864455659697, "grad_norm": 0.14077137410640717, "learning_rate": 1.0872222753861172e-05, "loss": 0.0212, "step": 39840 }, { "epoch": 28.73107426099495, "grad_norm": 0.15643103420734406, "learning_rate": 1.0851642942921925e-05, "loss": 0.0258, "step": 39850 }, { "epoch": 28.73828406633021, "grad_norm": 0.17274245619773865, "learning_rate": 1.0831080256804093e-05, "loss": 0.0297, "step": 39860 }, { "epoch": 28.745493871665467, "grad_norm": 0.18789635598659515, "learning_rate": 1.0810534704502478e-05, "loss": 0.0285, "step": 39870 }, { "epoch": 28.75270367700072, "grad_norm": 0.09764766693115234, "learning_rate": 1.0790006295004423e-05, "loss": 0.0247, "step": 39880 }, { "epoch": 28.75991348233598, "grad_norm": 0.1267770230770111, "learning_rate": 1.076949503728974e-05, "loss": 0.0238, "step": 39890 }, { "epoch": 28.767123287671232, "grad_norm": 0.16679313778877258, "learning_rate": 1.074900094033074e-05, "loss": 0.0239, "step": 39900 }, { "epoch": 28.77433309300649, "grad_norm": 0.11046041548252106, "learning_rate": 1.0728524013092256e-05, "loss": 0.0214, "step": 39910 }, { "epoch": 28.781542898341744, "grad_norm": 0.11334939301013947, "learning_rate": 1.0708064264531581e-05, "loss": 0.024, "step": 39920 }, { "epoch": 28.788752703677, "grad_norm": 0.1231813132762909, "learning_rate": 1.068762170359851e-05, "loss": 0.0252, "step": 39930 }, { "epoch": 28.795962509012256, "grad_norm": 0.17085886001586914, "learning_rate": 1.0667196339235297e-05, "loss": 0.0267, "step": 39940 }, { "epoch": 28.803172314347513, "grad_norm": 0.13988013565540314, "learning_rate": 1.0646788180376716e-05, "loss": 0.0198, "step": 39950 }, { "epoch": 28.810382119682767, "grad_norm": 0.17796821892261505, "learning_rate": 1.0626397235949976e-05, "loss": 0.0284, "step": 39960 }, { "epoch": 28.817591925018025, "grad_norm": 0.17472828924655914, "learning_rate": 1.0606023514874751e-05, "loss": 0.0227, "step": 39970 }, { "epoch": 28.82480173035328, "grad_norm": 0.12979371845722198, "learning_rate": 1.0585667026063233e-05, "loss": 0.0218, "step": 39980 }, { "epoch": 28.832011535688537, "grad_norm": 0.15952672064304352, "learning_rate": 1.0565327778420025e-05, "loss": 0.0214, "step": 39990 }, { "epoch": 28.83922134102379, "grad_norm": 0.13564574718475342, "learning_rate": 1.0545005780842193e-05, "loss": 0.0219, "step": 40000 } ], "logging_steps": 10, "max_steps": 50000, "num_input_tokens_seen": 0, "num_train_epochs": 37, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }