| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 13.812154696132596, |
| "eval_steps": 500, |
| "global_step": 20000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006906077348066298, |
| "grad_norm": 2.1743838787078857, |
| "learning_rate": 9e-07, |
| "loss": 0.9948, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.013812154696132596, |
| "grad_norm": 1.8689522743225098, |
| "learning_rate": 1.9e-06, |
| "loss": 0.9971, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.020718232044198894, |
| "grad_norm": 1.4171581268310547, |
| "learning_rate": 2.9e-06, |
| "loss": 0.9942, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.027624309392265192, |
| "grad_norm": 1.5665574073791504, |
| "learning_rate": 3.9e-06, |
| "loss": 0.954, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.034530386740331494, |
| "grad_norm": 1.5501309633255005, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 0.8629, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04143646408839779, |
| "grad_norm": 1.1840782165527344, |
| "learning_rate": 5.9e-06, |
| "loss": 0.8254, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04834254143646409, |
| "grad_norm": 1.3108546733856201, |
| "learning_rate": 6.900000000000001e-06, |
| "loss": 0.6742, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.055248618784530384, |
| "grad_norm": 0.9183031320571899, |
| "learning_rate": 7.9e-06, |
| "loss": 0.5774, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.062154696132596686, |
| "grad_norm": 0.5960970520973206, |
| "learning_rate": 8.9e-06, |
| "loss": 0.4576, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06906077348066299, |
| "grad_norm": 0.5846773982048035, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 0.4257, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07596685082872928, |
| "grad_norm": 0.43746134638786316, |
| "learning_rate": 1.09e-05, |
| "loss": 0.3877, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.08287292817679558, |
| "grad_norm": 0.3833974003791809, |
| "learning_rate": 1.19e-05, |
| "loss": 0.3288, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08977900552486189, |
| "grad_norm": 0.4290325939655304, |
| "learning_rate": 1.29e-05, |
| "loss": 0.3196, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09668508287292818, |
| "grad_norm": 0.4357810318470001, |
| "learning_rate": 1.3900000000000002e-05, |
| "loss": 0.3117, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.10359116022099447, |
| "grad_norm": 0.33745425939559937, |
| "learning_rate": 1.49e-05, |
| "loss": 0.2888, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.11049723756906077, |
| "grad_norm": 0.3580892086029053, |
| "learning_rate": 1.59e-05, |
| "loss": 0.2803, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11740331491712708, |
| "grad_norm": 0.30870571732521057, |
| "learning_rate": 1.69e-05, |
| "loss": 0.2556, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.12430939226519337, |
| "grad_norm": 0.4122651219367981, |
| "learning_rate": 1.79e-05, |
| "loss": 0.253, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.13121546961325967, |
| "grad_norm": 0.46398815512657166, |
| "learning_rate": 1.8900000000000002e-05, |
| "loss": 0.2369, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.13812154696132597, |
| "grad_norm": 0.497284859418869, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 0.2358, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.14502762430939226, |
| "grad_norm": 0.28150540590286255, |
| "learning_rate": 2.09e-05, |
| "loss": 0.2371, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.15193370165745856, |
| "grad_norm": 0.3314034938812256, |
| "learning_rate": 2.19e-05, |
| "loss": 0.2405, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.15883977900552487, |
| "grad_norm": 0.4119907021522522, |
| "learning_rate": 2.29e-05, |
| "loss": 0.225, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.16574585635359115, |
| "grad_norm": 0.5320000648498535, |
| "learning_rate": 2.39e-05, |
| "loss": 0.2165, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.17265193370165746, |
| "grad_norm": 0.7389222979545593, |
| "learning_rate": 2.4900000000000002e-05, |
| "loss": 0.2045, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.17955801104972377, |
| "grad_norm": 0.3889163136482239, |
| "learning_rate": 2.5900000000000003e-05, |
| "loss": 0.2135, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.18646408839779005, |
| "grad_norm": 0.3049943149089813, |
| "learning_rate": 2.6900000000000003e-05, |
| "loss": 0.1976, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.19337016574585636, |
| "grad_norm": 0.3817075490951538, |
| "learning_rate": 2.7900000000000004e-05, |
| "loss": 0.1823, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.20027624309392264, |
| "grad_norm": 0.605591356754303, |
| "learning_rate": 2.8899999999999998e-05, |
| "loss": 0.1997, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.20718232044198895, |
| "grad_norm": 0.36424922943115234, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 0.18, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.21408839779005526, |
| "grad_norm": 0.3346197307109833, |
| "learning_rate": 3.09e-05, |
| "loss": 0.1868, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.22099447513812154, |
| "grad_norm": 0.559263288974762, |
| "learning_rate": 3.19e-05, |
| "loss": 0.185, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.22790055248618785, |
| "grad_norm": 0.6093797087669373, |
| "learning_rate": 3.29e-05, |
| "loss": 0.1803, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.23480662983425415, |
| "grad_norm": 0.3911588191986084, |
| "learning_rate": 3.3900000000000004e-05, |
| "loss": 0.1694, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.24171270718232044, |
| "grad_norm": 0.47210046648979187, |
| "learning_rate": 3.49e-05, |
| "loss": 0.17, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.24861878453038674, |
| "grad_norm": 0.7929732799530029, |
| "learning_rate": 3.59e-05, |
| "loss": 0.1754, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.255524861878453, |
| "grad_norm": 0.5116897225379944, |
| "learning_rate": 3.69e-05, |
| "loss": 0.1679, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.26243093922651933, |
| "grad_norm": 0.4088026285171509, |
| "learning_rate": 3.79e-05, |
| "loss": 0.1512, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.26933701657458564, |
| "grad_norm": 0.44423434138298035, |
| "learning_rate": 3.8900000000000004e-05, |
| "loss": 0.1454, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.27624309392265195, |
| "grad_norm": 0.4872801899909973, |
| "learning_rate": 3.99e-05, |
| "loss": 0.1506, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.28314917127071826, |
| "grad_norm": 0.5234099626541138, |
| "learning_rate": 4.09e-05, |
| "loss": 0.148, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2900552486187845, |
| "grad_norm": 0.4611225724220276, |
| "learning_rate": 4.19e-05, |
| "loss": 0.1454, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2969613259668508, |
| "grad_norm": 0.42924195528030396, |
| "learning_rate": 4.29e-05, |
| "loss": 0.1442, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.30386740331491713, |
| "grad_norm": 0.6093170642852783, |
| "learning_rate": 4.39e-05, |
| "loss": 0.1345, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.31077348066298344, |
| "grad_norm": 0.403637170791626, |
| "learning_rate": 4.49e-05, |
| "loss": 0.1411, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.31767955801104975, |
| "grad_norm": 0.48455968499183655, |
| "learning_rate": 4.5900000000000004e-05, |
| "loss": 0.1426, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.324585635359116, |
| "grad_norm": 0.8913393616676331, |
| "learning_rate": 4.69e-05, |
| "loss": 0.1481, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3314917127071823, |
| "grad_norm": 0.48442342877388, |
| "learning_rate": 4.79e-05, |
| "loss": 0.1277, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3383977900552486, |
| "grad_norm": 0.5443485379219055, |
| "learning_rate": 4.89e-05, |
| "loss": 0.1337, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3453038674033149, |
| "grad_norm": 0.5175359845161438, |
| "learning_rate": 4.99e-05, |
| "loss": 0.122, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.35220994475138123, |
| "grad_norm": 0.5098366141319275, |
| "learning_rate": 5.0900000000000004e-05, |
| "loss": 0.1304, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.35911602209944754, |
| "grad_norm": 0.36380815505981445, |
| "learning_rate": 5.19e-05, |
| "loss": 0.1314, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.3660220994475138, |
| "grad_norm": 0.5042738318443298, |
| "learning_rate": 5.2900000000000005e-05, |
| "loss": 0.128, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3729281767955801, |
| "grad_norm": 0.4525247812271118, |
| "learning_rate": 5.390000000000001e-05, |
| "loss": 0.1205, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3798342541436464, |
| "grad_norm": 0.5141445398330688, |
| "learning_rate": 5.4900000000000006e-05, |
| "loss": 0.1265, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3867403314917127, |
| "grad_norm": 0.4617238938808441, |
| "learning_rate": 5.590000000000001e-05, |
| "loss": 0.1219, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.393646408839779, |
| "grad_norm": 0.7641685605049133, |
| "learning_rate": 5.69e-05, |
| "loss": 0.1315, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4005524861878453, |
| "grad_norm": 0.3960668742656708, |
| "learning_rate": 5.79e-05, |
| "loss": 0.1272, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.4074585635359116, |
| "grad_norm": 0.35828521847724915, |
| "learning_rate": 5.89e-05, |
| "loss": 0.128, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.4143646408839779, |
| "grad_norm": 0.42251715064048767, |
| "learning_rate": 5.99e-05, |
| "loss": 0.1267, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4212707182320442, |
| "grad_norm": 0.5093334913253784, |
| "learning_rate": 6.09e-05, |
| "loss": 0.1359, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.4281767955801105, |
| "grad_norm": 0.7192143797874451, |
| "learning_rate": 6.19e-05, |
| "loss": 0.1205, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.4350828729281768, |
| "grad_norm": 0.8674402236938477, |
| "learning_rate": 6.29e-05, |
| "loss": 0.1156, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.4419889502762431, |
| "grad_norm": 0.31834760308265686, |
| "learning_rate": 6.390000000000001e-05, |
| "loss": 0.1116, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4488950276243094, |
| "grad_norm": 0.3638267517089844, |
| "learning_rate": 6.49e-05, |
| "loss": 0.1142, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4558011049723757, |
| "grad_norm": 0.48287007212638855, |
| "learning_rate": 6.59e-05, |
| "loss": 0.1179, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.462707182320442, |
| "grad_norm": 0.42799556255340576, |
| "learning_rate": 6.690000000000001e-05, |
| "loss": 0.1155, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4696132596685083, |
| "grad_norm": 0.5267932415008545, |
| "learning_rate": 6.790000000000001e-05, |
| "loss": 0.1122, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.47651933701657456, |
| "grad_norm": 0.3648410737514496, |
| "learning_rate": 6.89e-05, |
| "loss": 0.1119, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.48342541436464087, |
| "grad_norm": 0.40923115611076355, |
| "learning_rate": 6.99e-05, |
| "loss": 0.1155, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4903314917127072, |
| "grad_norm": 0.43576744198799133, |
| "learning_rate": 7.09e-05, |
| "loss": 0.1262, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.4972375690607735, |
| "grad_norm": 0.5781348347663879, |
| "learning_rate": 7.19e-05, |
| "loss": 0.1148, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5041436464088398, |
| "grad_norm": 0.4100674092769623, |
| "learning_rate": 7.29e-05, |
| "loss": 0.1098, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.511049723756906, |
| "grad_norm": 0.3947247266769409, |
| "learning_rate": 7.390000000000001e-05, |
| "loss": 0.1022, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5179558011049724, |
| "grad_norm": 0.437826007604599, |
| "learning_rate": 7.49e-05, |
| "loss": 0.1135, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5248618784530387, |
| "grad_norm": 0.362348735332489, |
| "learning_rate": 7.59e-05, |
| "loss": 0.1009, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5317679558011049, |
| "grad_norm": 0.5351320505142212, |
| "learning_rate": 7.69e-05, |
| "loss": 0.1088, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5386740331491713, |
| "grad_norm": 0.5323932766914368, |
| "learning_rate": 7.790000000000001e-05, |
| "loss": 0.1033, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5455801104972375, |
| "grad_norm": 0.4217228889465332, |
| "learning_rate": 7.890000000000001e-05, |
| "loss": 0.1172, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5524861878453039, |
| "grad_norm": 0.43072474002838135, |
| "learning_rate": 7.99e-05, |
| "loss": 0.1109, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5593922651933702, |
| "grad_norm": 0.34974709153175354, |
| "learning_rate": 8.090000000000001e-05, |
| "loss": 0.1181, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5662983425414365, |
| "grad_norm": 0.35198482871055603, |
| "learning_rate": 8.19e-05, |
| "loss": 0.1067, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5732044198895028, |
| "grad_norm": 0.36193737387657166, |
| "learning_rate": 8.29e-05, |
| "loss": 0.1066, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.580110497237569, |
| "grad_norm": 0.5137208104133606, |
| "learning_rate": 8.39e-05, |
| "loss": 0.1008, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5870165745856354, |
| "grad_norm": 0.4571826457977295, |
| "learning_rate": 8.49e-05, |
| "loss": 0.1187, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5939226519337016, |
| "grad_norm": 0.4294939339160919, |
| "learning_rate": 8.59e-05, |
| "loss": 0.1051, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.600828729281768, |
| "grad_norm": 0.5359936952590942, |
| "learning_rate": 8.69e-05, |
| "loss": 0.1094, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6077348066298343, |
| "grad_norm": 0.31322285532951355, |
| "learning_rate": 8.790000000000001e-05, |
| "loss": 0.1003, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6146408839779005, |
| "grad_norm": 0.27965644001960754, |
| "learning_rate": 8.89e-05, |
| "loss": 0.0963, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6215469613259669, |
| "grad_norm": 0.2999873757362366, |
| "learning_rate": 8.99e-05, |
| "loss": 0.1014, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6284530386740331, |
| "grad_norm": 0.47803860902786255, |
| "learning_rate": 9.090000000000001e-05, |
| "loss": 0.0926, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6353591160220995, |
| "grad_norm": 0.44106921553611755, |
| "learning_rate": 9.190000000000001e-05, |
| "loss": 0.1029, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6422651933701657, |
| "grad_norm": 0.3900652229785919, |
| "learning_rate": 9.290000000000001e-05, |
| "loss": 0.0997, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.649171270718232, |
| "grad_norm": 0.4396621286869049, |
| "learning_rate": 9.39e-05, |
| "loss": 0.1, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6560773480662984, |
| "grad_norm": 0.43293172121047974, |
| "learning_rate": 9.49e-05, |
| "loss": 0.1092, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6629834254143646, |
| "grad_norm": 0.4162095785140991, |
| "learning_rate": 9.59e-05, |
| "loss": 0.0911, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.669889502762431, |
| "grad_norm": 0.385890930891037, |
| "learning_rate": 9.69e-05, |
| "loss": 0.0984, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6767955801104972, |
| "grad_norm": 0.36000925302505493, |
| "learning_rate": 9.790000000000001e-05, |
| "loss": 0.0941, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6837016574585635, |
| "grad_norm": 0.28026294708251953, |
| "learning_rate": 9.89e-05, |
| "loss": 0.1002, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6906077348066298, |
| "grad_norm": 0.3528141379356384, |
| "learning_rate": 9.99e-05, |
| "loss": 0.0872, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6975138121546961, |
| "grad_norm": 0.500379741191864, |
| "learning_rate": 9.999994463727085e-05, |
| "loss": 0.0938, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7044198895027625, |
| "grad_norm": 0.42596903443336487, |
| "learning_rate": 9.999975326009292e-05, |
| "loss": 0.0959, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7113259668508287, |
| "grad_norm": 0.2438584268093109, |
| "learning_rate": 9.999942518549879e-05, |
| "loss": 0.0963, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7182320441988951, |
| "grad_norm": 0.3988669514656067, |
| "learning_rate": 9.999896041438544e-05, |
| "loss": 0.0932, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7251381215469613, |
| "grad_norm": 0.28523698449134827, |
| "learning_rate": 9.999835894802353e-05, |
| "loss": 0.0888, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7320441988950276, |
| "grad_norm": 0.511328399181366, |
| "learning_rate": 9.999762078805743e-05, |
| "loss": 0.104, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.738950276243094, |
| "grad_norm": 0.2832479476928711, |
| "learning_rate": 9.999674593650526e-05, |
| "loss": 0.0905, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.7458563535911602, |
| "grad_norm": 0.32691365480422974, |
| "learning_rate": 9.99957343957588e-05, |
| "loss": 0.0978, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7527624309392266, |
| "grad_norm": 0.349331259727478, |
| "learning_rate": 9.99945861685836e-05, |
| "loss": 0.0911, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.7596685082872928, |
| "grad_norm": 0.4922757148742676, |
| "learning_rate": 9.999330125811884e-05, |
| "loss": 0.0881, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7665745856353591, |
| "grad_norm": 0.26750776171684265, |
| "learning_rate": 9.999187966787744e-05, |
| "loss": 0.0856, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.7734806629834254, |
| "grad_norm": 0.298186719417572, |
| "learning_rate": 9.999032140174595e-05, |
| "loss": 0.0875, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.7803867403314917, |
| "grad_norm": 0.3135933578014374, |
| "learning_rate": 9.998862646398464e-05, |
| "loss": 0.0935, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.787292817679558, |
| "grad_norm": 0.37238550186157227, |
| "learning_rate": 9.998679485922739e-05, |
| "loss": 0.0975, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.7941988950276243, |
| "grad_norm": 0.3704855442047119, |
| "learning_rate": 9.998482659248174e-05, |
| "loss": 0.0886, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.8011049723756906, |
| "grad_norm": 0.3354080617427826, |
| "learning_rate": 9.998272166912883e-05, |
| "loss": 0.0923, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.8080110497237569, |
| "grad_norm": 0.2832203209400177, |
| "learning_rate": 9.998048009492347e-05, |
| "loss": 0.0905, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8149171270718232, |
| "grad_norm": 0.3106226921081543, |
| "learning_rate": 9.997810187599403e-05, |
| "loss": 0.0874, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8218232044198895, |
| "grad_norm": 0.26605871319770813, |
| "learning_rate": 9.997558701884249e-05, |
| "loss": 0.0883, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.8287292817679558, |
| "grad_norm": 0.23787857592105865, |
| "learning_rate": 9.997293553034433e-05, |
| "loss": 0.0851, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.835635359116022, |
| "grad_norm": 0.2726190686225891, |
| "learning_rate": 9.997014741774866e-05, |
| "loss": 0.0912, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.8425414364640884, |
| "grad_norm": 0.3378348648548126, |
| "learning_rate": 9.996722268867803e-05, |
| "loss": 0.0837, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.8494475138121547, |
| "grad_norm": 0.2348347008228302, |
| "learning_rate": 9.996416135112858e-05, |
| "loss": 0.095, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.856353591160221, |
| "grad_norm": 0.29881569743156433, |
| "learning_rate": 9.996096341346988e-05, |
| "loss": 0.0807, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.8632596685082873, |
| "grad_norm": 0.278564989566803, |
| "learning_rate": 9.995762888444495e-05, |
| "loss": 0.0941, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.8701657458563536, |
| "grad_norm": 0.26641955971717834, |
| "learning_rate": 9.995415777317027e-05, |
| "loss": 0.0813, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.8770718232044199, |
| "grad_norm": 0.2798615097999573, |
| "learning_rate": 9.995055008913574e-05, |
| "loss": 0.0885, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.8839779005524862, |
| "grad_norm": 0.35161152482032776, |
| "learning_rate": 9.994680584220463e-05, |
| "loss": 0.0929, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.8908839779005525, |
| "grad_norm": 0.3145495653152466, |
| "learning_rate": 9.994292504261355e-05, |
| "loss": 0.0903, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.8977900552486188, |
| "grad_norm": 0.2972181439399719, |
| "learning_rate": 9.993890770097247e-05, |
| "loss": 0.0806, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9046961325966851, |
| "grad_norm": 0.45706817507743835, |
| "learning_rate": 9.993475382826467e-05, |
| "loss": 0.088, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.9116022099447514, |
| "grad_norm": 0.35890793800354004, |
| "learning_rate": 9.993046343584664e-05, |
| "loss": 0.0869, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.9185082872928176, |
| "grad_norm": 0.3728267252445221, |
| "learning_rate": 9.992603653544816e-05, |
| "loss": 0.077, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.925414364640884, |
| "grad_norm": 0.3744541108608246, |
| "learning_rate": 9.992147313917222e-05, |
| "loss": 0.0818, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.9323204419889503, |
| "grad_norm": 0.3192994296550751, |
| "learning_rate": 9.991677325949497e-05, |
| "loss": 0.0793, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.9392265193370166, |
| "grad_norm": 0.31171876192092896, |
| "learning_rate": 9.991193690926568e-05, |
| "loss": 0.0832, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.9461325966850829, |
| "grad_norm": 0.24060353636741638, |
| "learning_rate": 9.990696410170678e-05, |
| "loss": 0.0721, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.9530386740331491, |
| "grad_norm": 0.2516084909439087, |
| "learning_rate": 9.990185485041371e-05, |
| "loss": 0.0782, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.9599447513812155, |
| "grad_norm": 0.2999783158302307, |
| "learning_rate": 9.989660916935498e-05, |
| "loss": 0.0724, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.9668508287292817, |
| "grad_norm": 0.3542238473892212, |
| "learning_rate": 9.989122707287208e-05, |
| "loss": 0.0844, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.9737569060773481, |
| "grad_norm": 0.3433320224285126, |
| "learning_rate": 9.988570857567945e-05, |
| "loss": 0.0772, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.9806629834254144, |
| "grad_norm": 0.3041175603866577, |
| "learning_rate": 9.988005369286446e-05, |
| "loss": 0.0891, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.9875690607734806, |
| "grad_norm": 0.2909347712993622, |
| "learning_rate": 9.987426243988734e-05, |
| "loss": 0.079, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.994475138121547, |
| "grad_norm": 0.4544817805290222, |
| "learning_rate": 9.986833483258114e-05, |
| "loss": 0.0838, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.0013812154696133, |
| "grad_norm": 0.2458307147026062, |
| "learning_rate": 9.986227088715173e-05, |
| "loss": 0.0803, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.0082872928176796, |
| "grad_norm": 0.4468945562839508, |
| "learning_rate": 9.98560706201777e-05, |
| "loss": 0.0841, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.0151933701657458, |
| "grad_norm": 0.3525172472000122, |
| "learning_rate": 9.984973404861036e-05, |
| "loss": 0.0851, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.022099447513812, |
| "grad_norm": 0.45504671335220337, |
| "learning_rate": 9.984326118977361e-05, |
| "loss": 0.0823, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.0290055248618784, |
| "grad_norm": 0.28854405879974365, |
| "learning_rate": 9.983665206136406e-05, |
| "loss": 0.0798, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.0359116022099448, |
| "grad_norm": 0.317660927772522, |
| "learning_rate": 9.982990668145075e-05, |
| "loss": 0.0849, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.042817679558011, |
| "grad_norm": 0.37529265880584717, |
| "learning_rate": 9.982302506847534e-05, |
| "loss": 0.0771, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.0497237569060773, |
| "grad_norm": 0.34826648235321045, |
| "learning_rate": 9.981600724125189e-05, |
| "loss": 0.0779, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.0566298342541436, |
| "grad_norm": 0.2414906919002533, |
| "learning_rate": 9.980885321896685e-05, |
| "loss": 0.079, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.06353591160221, |
| "grad_norm": 0.23677992820739746, |
| "learning_rate": 9.980156302117905e-05, |
| "loss": 0.0781, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.0704419889502763, |
| "grad_norm": 0.3170182704925537, |
| "learning_rate": 9.979413666781963e-05, |
| "loss": 0.0767, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.0773480662983426, |
| "grad_norm": 0.18258549273014069, |
| "learning_rate": 9.978657417919193e-05, |
| "loss": 0.0747, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.0842541436464088, |
| "grad_norm": 0.2986181378364563, |
| "learning_rate": 9.977887557597153e-05, |
| "loss": 0.0688, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.091160220994475, |
| "grad_norm": 0.3077967166900635, |
| "learning_rate": 9.97710408792061e-05, |
| "loss": 0.0732, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.0980662983425415, |
| "grad_norm": 0.2392505258321762, |
| "learning_rate": 9.976307011031542e-05, |
| "loss": 0.0741, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.1049723756906078, |
| "grad_norm": 0.40254053473472595, |
| "learning_rate": 9.975496329109126e-05, |
| "loss": 0.0773, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.111878453038674, |
| "grad_norm": 0.2991298735141754, |
| "learning_rate": 9.974672044369732e-05, |
| "loss": 0.0673, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.1187845303867403, |
| "grad_norm": 0.2567093074321747, |
| "learning_rate": 9.97383415906693e-05, |
| "loss": 0.0661, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.1256906077348066, |
| "grad_norm": 0.250507652759552, |
| "learning_rate": 9.97298267549146e-05, |
| "loss": 0.0696, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.132596685082873, |
| "grad_norm": 0.35195478796958923, |
| "learning_rate": 9.972117595971249e-05, |
| "loss": 0.0754, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.1395027624309393, |
| "grad_norm": 0.27800172567367554, |
| "learning_rate": 9.971238922871391e-05, |
| "loss": 0.0821, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.1464088397790055, |
| "grad_norm": 0.17888322472572327, |
| "learning_rate": 9.970346658594142e-05, |
| "loss": 0.0699, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.1533149171270718, |
| "grad_norm": 0.19279469549655914, |
| "learning_rate": 9.969440805578923e-05, |
| "loss": 0.0712, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.160220994475138, |
| "grad_norm": 0.3412877917289734, |
| "learning_rate": 9.968521366302298e-05, |
| "loss": 0.0764, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.1671270718232045, |
| "grad_norm": 0.3297848403453827, |
| "learning_rate": 9.967588343277981e-05, |
| "loss": 0.0758, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.1740331491712708, |
| "grad_norm": 0.3466518521308899, |
| "learning_rate": 9.966641739056818e-05, |
| "loss": 0.0765, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.180939226519337, |
| "grad_norm": 0.2665199637413025, |
| "learning_rate": 9.965681556226793e-05, |
| "loss": 0.076, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.1878453038674033, |
| "grad_norm": 0.28554102778434753, |
| "learning_rate": 9.964707797413006e-05, |
| "loss": 0.078, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.1947513812154695, |
| "grad_norm": 0.256296843290329, |
| "learning_rate": 9.963720465277679e-05, |
| "loss": 0.0744, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.201657458563536, |
| "grad_norm": 0.42945459485054016, |
| "learning_rate": 9.96271956252014e-05, |
| "loss": 0.0759, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.2085635359116023, |
| "grad_norm": 0.29390648007392883, |
| "learning_rate": 9.961705091876816e-05, |
| "loss": 0.0736, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.2154696132596685, |
| "grad_norm": 0.1821282058954239, |
| "learning_rate": 9.960677056121235e-05, |
| "loss": 0.0725, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.2223756906077348, |
| "grad_norm": 0.29151904582977295, |
| "learning_rate": 9.959635458064005e-05, |
| "loss": 0.0715, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.229281767955801, |
| "grad_norm": 0.1900283247232437, |
| "learning_rate": 9.958580300552815e-05, |
| "loss": 0.0696, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.2361878453038675, |
| "grad_norm": 0.19765739142894745, |
| "learning_rate": 9.957511586472426e-05, |
| "loss": 0.0755, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.2430939226519337, |
| "grad_norm": 0.3573096990585327, |
| "learning_rate": 9.956429318744662e-05, |
| "loss": 0.0754, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.36740875244140625, |
| "learning_rate": 9.955333500328404e-05, |
| "loss": 0.0706, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.2569060773480663, |
| "grad_norm": 0.5108208060264587, |
| "learning_rate": 9.95422413421957e-05, |
| "loss": 0.0708, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.2638121546961325, |
| "grad_norm": 0.3641519546508789, |
| "learning_rate": 9.953101223451133e-05, |
| "loss": 0.0709, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.270718232044199, |
| "grad_norm": 0.27009454369544983, |
| "learning_rate": 9.951964771093085e-05, |
| "loss": 0.0702, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.2776243093922652, |
| "grad_norm": 0.2130231261253357, |
| "learning_rate": 9.950814780252442e-05, |
| "loss": 0.0639, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.2845303867403315, |
| "grad_norm": 0.286380797624588, |
| "learning_rate": 9.949651254073236e-05, |
| "loss": 0.0764, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.2914364640883977, |
| "grad_norm": 0.309061735868454, |
| "learning_rate": 9.948474195736504e-05, |
| "loss": 0.0689, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.298342541436464, |
| "grad_norm": 0.3645918667316437, |
| "learning_rate": 9.947283608460277e-05, |
| "loss": 0.0716, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.3052486187845305, |
| "grad_norm": 0.30466482043266296, |
| "learning_rate": 9.946079495499577e-05, |
| "loss": 0.0658, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.3121546961325967, |
| "grad_norm": 0.24554474651813507, |
| "learning_rate": 9.944861860146401e-05, |
| "loss": 0.0723, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.319060773480663, |
| "grad_norm": 0.22281047701835632, |
| "learning_rate": 9.943630705729719e-05, |
| "loss": 0.0743, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.3259668508287292, |
| "grad_norm": 0.22427059710025787, |
| "learning_rate": 9.942386035615459e-05, |
| "loss": 0.0688, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.3328729281767955, |
| "grad_norm": 0.3036755621433258, |
| "learning_rate": 9.941127853206503e-05, |
| "loss": 0.0707, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.339779005524862, |
| "grad_norm": 0.23490217328071594, |
| "learning_rate": 9.939856161942673e-05, |
| "loss": 0.0701, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.3466850828729282, |
| "grad_norm": 0.2540903389453888, |
| "learning_rate": 9.938570965300724e-05, |
| "loss": 0.0668, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.3535911602209945, |
| "grad_norm": 0.351299911737442, |
| "learning_rate": 9.937272266794335e-05, |
| "loss": 0.0603, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.3604972375690607, |
| "grad_norm": 0.30575865507125854, |
| "learning_rate": 9.935960069974096e-05, |
| "loss": 0.0735, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.367403314917127, |
| "grad_norm": 0.2758401930332184, |
| "learning_rate": 9.934634378427506e-05, |
| "loss": 0.0723, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.3743093922651934, |
| "grad_norm": 0.42013078927993774, |
| "learning_rate": 9.933295195778954e-05, |
| "loss": 0.0671, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.3812154696132597, |
| "grad_norm": 0.30824539065361023, |
| "learning_rate": 9.931942525689715e-05, |
| "loss": 0.0636, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.388121546961326, |
| "grad_norm": 0.2764925956726074, |
| "learning_rate": 9.930576371857936e-05, |
| "loss": 0.0655, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.3950276243093922, |
| "grad_norm": 0.25183069705963135, |
| "learning_rate": 9.929196738018629e-05, |
| "loss": 0.0655, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.4019337016574585, |
| "grad_norm": 0.18897801637649536, |
| "learning_rate": 9.927803627943662e-05, |
| "loss": 0.068, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.408839779005525, |
| "grad_norm": 0.4160577356815338, |
| "learning_rate": 9.926397045441744e-05, |
| "loss": 0.0731, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.4157458563535912, |
| "grad_norm": 0.25782227516174316, |
| "learning_rate": 9.924976994358417e-05, |
| "loss": 0.065, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.4226519337016574, |
| "grad_norm": 0.42086145281791687, |
| "learning_rate": 9.923543478576048e-05, |
| "loss": 0.0637, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.4295580110497237, |
| "grad_norm": 0.25658249855041504, |
| "learning_rate": 9.922096502013813e-05, |
| "loss": 0.0609, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.43646408839779, |
| "grad_norm": 0.31206923723220825, |
| "learning_rate": 9.92063606862769e-05, |
| "loss": 0.0671, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.4433701657458564, |
| "grad_norm": 0.2223379760980606, |
| "learning_rate": 9.919162182410453e-05, |
| "loss": 0.0604, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.4502762430939227, |
| "grad_norm": 0.3089164197444916, |
| "learning_rate": 9.917674847391645e-05, |
| "loss": 0.0647, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.457182320441989, |
| "grad_norm": 0.28447818756103516, |
| "learning_rate": 9.916174067637584e-05, |
| "loss": 0.0625, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.4640883977900552, |
| "grad_norm": 0.3222653567790985, |
| "learning_rate": 9.914659847251348e-05, |
| "loss": 0.0592, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.4709944751381214, |
| "grad_norm": 0.17972660064697266, |
| "learning_rate": 9.913132190372753e-05, |
| "loss": 0.0671, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.477900552486188, |
| "grad_norm": 0.3882119357585907, |
| "learning_rate": 9.911591101178359e-05, |
| "loss": 0.0633, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.4848066298342542, |
| "grad_norm": 0.2396843284368515, |
| "learning_rate": 9.910036583881443e-05, |
| "loss": 0.0699, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.4917127071823204, |
| "grad_norm": 0.2941199839115143, |
| "learning_rate": 9.908468642731995e-05, |
| "loss": 0.0696, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.4986187845303867, |
| "grad_norm": 0.28291407227516174, |
| "learning_rate": 9.906887282016707e-05, |
| "loss": 0.0654, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.505524861878453, |
| "grad_norm": 0.21583221852779388, |
| "learning_rate": 9.90529250605896e-05, |
| "loss": 0.0638, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.5124309392265194, |
| "grad_norm": 0.20484983921051025, |
| "learning_rate": 9.903684319218809e-05, |
| "loss": 0.0663, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.5193370165745856, |
| "grad_norm": 0.3546452820301056, |
| "learning_rate": 9.902062725892976e-05, |
| "loss": 0.065, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.526243093922652, |
| "grad_norm": 0.31812551617622375, |
| "learning_rate": 9.900427730514834e-05, |
| "loss": 0.058, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.5331491712707184, |
| "grad_norm": 0.24564820528030396, |
| "learning_rate": 9.8987793375544e-05, |
| "loss": 0.0656, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.5400552486187844, |
| "grad_norm": 0.3509599566459656, |
| "learning_rate": 9.897117551518318e-05, |
| "loss": 0.0667, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.5469613259668509, |
| "grad_norm": 0.1958969682455063, |
| "learning_rate": 9.895442376949844e-05, |
| "loss": 0.0627, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.5538674033149171, |
| "grad_norm": 0.2799903154373169, |
| "learning_rate": 9.893753818428845e-05, |
| "loss": 0.0649, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.5607734806629834, |
| "grad_norm": 0.27472683787345886, |
| "learning_rate": 9.892051880571773e-05, |
| "loss": 0.0624, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.5676795580110499, |
| "grad_norm": 0.3503767251968384, |
| "learning_rate": 9.890336568031663e-05, |
| "loss": 0.0618, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.5745856353591159, |
| "grad_norm": 0.25655266642570496, |
| "learning_rate": 9.888607885498113e-05, |
| "loss": 0.0654, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.5814917127071824, |
| "grad_norm": 0.19729627668857574, |
| "learning_rate": 9.886865837697275e-05, |
| "loss": 0.0592, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.5883977900552486, |
| "grad_norm": 0.46757638454437256, |
| "learning_rate": 9.88511042939184e-05, |
| "loss": 0.0591, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.5953038674033149, |
| "grad_norm": 0.3462216556072235, |
| "learning_rate": 9.883341665381028e-05, |
| "loss": 0.0654, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.6022099447513813, |
| "grad_norm": 0.2920167148113251, |
| "learning_rate": 9.881559550500575e-05, |
| "loss": 0.0608, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.6091160220994474, |
| "grad_norm": 0.22915363311767578, |
| "learning_rate": 9.879764089622712e-05, |
| "loss": 0.0705, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.6160220994475138, |
| "grad_norm": 0.28002291917800903, |
| "learning_rate": 9.87795528765616e-05, |
| "loss": 0.0621, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.62292817679558, |
| "grad_norm": 0.24930529296398163, |
| "learning_rate": 9.876133149546118e-05, |
| "loss": 0.0689, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.6298342541436464, |
| "grad_norm": 0.33754250407218933, |
| "learning_rate": 9.874297680274238e-05, |
| "loss": 0.0596, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.6367403314917128, |
| "grad_norm": 0.3058784306049347, |
| "learning_rate": 9.872448884858624e-05, |
| "loss": 0.0614, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.6436464088397789, |
| "grad_norm": 0.2852232754230499, |
| "learning_rate": 9.870586768353815e-05, |
| "loss": 0.0549, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.6505524861878453, |
| "grad_norm": 0.28618133068084717, |
| "learning_rate": 9.868711335850764e-05, |
| "loss": 0.0609, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.6574585635359116, |
| "grad_norm": 0.19881612062454224, |
| "learning_rate": 9.866822592476833e-05, |
| "loss": 0.0635, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.6643646408839778, |
| "grad_norm": 0.2809147834777832, |
| "learning_rate": 9.86492054339577e-05, |
| "loss": 0.058, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.6712707182320443, |
| "grad_norm": 0.2217167168855667, |
| "learning_rate": 9.863005193807711e-05, |
| "loss": 0.061, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.6781767955801103, |
| "grad_norm": 0.24295663833618164, |
| "learning_rate": 9.861076548949143e-05, |
| "loss": 0.0606, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.6850828729281768, |
| "grad_norm": 0.3120078146457672, |
| "learning_rate": 9.859134614092912e-05, |
| "loss": 0.063, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.691988950276243, |
| "grad_norm": 0.2926420271396637, |
| "learning_rate": 9.857179394548191e-05, |
| "loss": 0.0628, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.6988950276243093, |
| "grad_norm": 0.2582651376724243, |
| "learning_rate": 9.855210895660477e-05, |
| "loss": 0.0615, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.7058011049723758, |
| "grad_norm": 0.18011657893657684, |
| "learning_rate": 9.853229122811568e-05, |
| "loss": 0.0558, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.7127071823204418, |
| "grad_norm": 0.29458683729171753, |
| "learning_rate": 9.851234081419559e-05, |
| "loss": 0.059, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.7196132596685083, |
| "grad_norm": 0.16498754918575287, |
| "learning_rate": 9.849225776938814e-05, |
| "loss": 0.053, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.7265193370165746, |
| "grad_norm": 0.31275665760040283, |
| "learning_rate": 9.847204214859964e-05, |
| "loss": 0.0613, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.7334254143646408, |
| "grad_norm": 0.18819302320480347, |
| "learning_rate": 9.845169400709879e-05, |
| "loss": 0.0597, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.7403314917127073, |
| "grad_norm": 0.278097003698349, |
| "learning_rate": 9.843121340051664e-05, |
| "loss": 0.0525, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.7472375690607733, |
| "grad_norm": 0.28996750712394714, |
| "learning_rate": 9.841060038484641e-05, |
| "loss": 0.0574, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.7541436464088398, |
| "grad_norm": 0.2054147720336914, |
| "learning_rate": 9.838985501644328e-05, |
| "loss": 0.0566, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.761049723756906, |
| "grad_norm": 0.23275215923786163, |
| "learning_rate": 9.83689773520243e-05, |
| "loss": 0.0687, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.7679558011049723, |
| "grad_norm": 0.30844447016716003, |
| "learning_rate": 9.834796744866819e-05, |
| "loss": 0.0573, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.7748618784530388, |
| "grad_norm": 0.22533856332302094, |
| "learning_rate": 9.832682536381525e-05, |
| "loss": 0.0612, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.7817679558011048, |
| "grad_norm": 0.2721378803253174, |
| "learning_rate": 9.830555115526711e-05, |
| "loss": 0.055, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.7886740331491713, |
| "grad_norm": 0.2064887285232544, |
| "learning_rate": 9.828414488118667e-05, |
| "loss": 0.0601, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.7955801104972375, |
| "grad_norm": 0.28619638085365295, |
| "learning_rate": 9.826260660009785e-05, |
| "loss": 0.0589, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.8024861878453038, |
| "grad_norm": 0.14919589459896088, |
| "learning_rate": 9.824093637088547e-05, |
| "loss": 0.0538, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.8093922651933703, |
| "grad_norm": 0.28396356105804443, |
| "learning_rate": 9.821913425279514e-05, |
| "loss": 0.0627, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.8162983425414365, |
| "grad_norm": 0.22441565990447998, |
| "learning_rate": 9.8197200305433e-05, |
| "loss": 0.0487, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.8232044198895028, |
| "grad_norm": 0.18380586802959442, |
| "learning_rate": 9.817513458876564e-05, |
| "loss": 0.0546, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.830110497237569, |
| "grad_norm": 0.24778667092323303, |
| "learning_rate": 9.815293716311987e-05, |
| "loss": 0.063, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.8370165745856353, |
| "grad_norm": 0.22658787667751312, |
| "learning_rate": 9.813060808918262e-05, |
| "loss": 0.0665, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.8439226519337018, |
| "grad_norm": 0.2324257493019104, |
| "learning_rate": 9.810814742800069e-05, |
| "loss": 0.0576, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.850828729281768, |
| "grad_norm": 0.3127438724040985, |
| "learning_rate": 9.808555524098074e-05, |
| "loss": 0.0606, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.8577348066298343, |
| "grad_norm": 0.27944085001945496, |
| "learning_rate": 9.806283158988887e-05, |
| "loss": 0.0538, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.8646408839779005, |
| "grad_norm": 0.2294415533542633, |
| "learning_rate": 9.803997653685072e-05, |
| "loss": 0.0608, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.8715469613259668, |
| "grad_norm": 0.280841201543808, |
| "learning_rate": 9.801699014435112e-05, |
| "loss": 0.0614, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.8784530386740332, |
| "grad_norm": 0.36730295419692993, |
| "learning_rate": 9.799387247523398e-05, |
| "loss": 0.0551, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.8853591160220995, |
| "grad_norm": 0.27677783370018005, |
| "learning_rate": 9.797062359270215e-05, |
| "loss": 0.0606, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.8922651933701657, |
| "grad_norm": 0.23716320097446442, |
| "learning_rate": 9.794724356031715e-05, |
| "loss": 0.0591, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.899171270718232, |
| "grad_norm": 0.22223462164402008, |
| "learning_rate": 9.792373244199913e-05, |
| "loss": 0.052, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.9060773480662982, |
| "grad_norm": 0.24536921083927155, |
| "learning_rate": 9.790009030202658e-05, |
| "loss": 0.0564, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.9129834254143647, |
| "grad_norm": 0.25623899698257446, |
| "learning_rate": 9.78763172050362e-05, |
| "loss": 0.0566, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.919889502762431, |
| "grad_norm": 0.20985721051692963, |
| "learning_rate": 9.785241321602274e-05, |
| "loss": 0.0594, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.9267955801104972, |
| "grad_norm": 0.22064441442489624, |
| "learning_rate": 9.782837840033879e-05, |
| "loss": 0.0551, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.9337016574585635, |
| "grad_norm": 0.19597521424293518, |
| "learning_rate": 9.780421282369461e-05, |
| "loss": 0.0541, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.9406077348066297, |
| "grad_norm": 0.22597678005695343, |
| "learning_rate": 9.777991655215797e-05, |
| "loss": 0.0551, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.9475138121546962, |
| "grad_norm": 0.2016141563653946, |
| "learning_rate": 9.775548965215394e-05, |
| "loss": 0.047, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.9544198895027625, |
| "grad_norm": 0.20736120641231537, |
| "learning_rate": 9.773093219046474e-05, |
| "loss": 0.0581, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.9613259668508287, |
| "grad_norm": 0.2478555291891098, |
| "learning_rate": 9.770624423422954e-05, |
| "loss": 0.0574, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.9682320441988952, |
| "grad_norm": 0.2794519066810608, |
| "learning_rate": 9.768142585094426e-05, |
| "loss": 0.0526, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.9751381215469612, |
| "grad_norm": 0.24204091727733612, |
| "learning_rate": 9.765647710846142e-05, |
| "loss": 0.0562, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.9820441988950277, |
| "grad_norm": 0.2637561857700348, |
| "learning_rate": 9.763139807498991e-05, |
| "loss": 0.0565, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.988950276243094, |
| "grad_norm": 0.18525560200214386, |
| "learning_rate": 9.760618881909487e-05, |
| "loss": 0.0551, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.9958563535911602, |
| "grad_norm": 0.18425393104553223, |
| "learning_rate": 9.758084940969744e-05, |
| "loss": 0.0536, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.0027624309392267, |
| "grad_norm": 0.15110640227794647, |
| "learning_rate": 9.755537991607459e-05, |
| "loss": 0.0589, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.0096685082872927, |
| "grad_norm": 0.2911590039730072, |
| "learning_rate": 9.752978040785895e-05, |
| "loss": 0.0641, |
| "step": 2910 |
| }, |
| { |
| "epoch": 2.016574585635359, |
| "grad_norm": 0.4290618300437927, |
| "learning_rate": 9.750405095503859e-05, |
| "loss": 0.0574, |
| "step": 2920 |
| }, |
| { |
| "epoch": 2.023480662983425, |
| "grad_norm": 0.13661666214466095, |
| "learning_rate": 9.747819162795686e-05, |
| "loss": 0.0551, |
| "step": 2930 |
| }, |
| { |
| "epoch": 2.0303867403314917, |
| "grad_norm": 0.18142226338386536, |
| "learning_rate": 9.745220249731217e-05, |
| "loss": 0.0515, |
| "step": 2940 |
| }, |
| { |
| "epoch": 2.037292817679558, |
| "grad_norm": 0.2540971040725708, |
| "learning_rate": 9.742608363415781e-05, |
| "loss": 0.0563, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.044198895027624, |
| "grad_norm": 0.1666320562362671, |
| "learning_rate": 9.739983510990176e-05, |
| "loss": 0.0616, |
| "step": 2960 |
| }, |
| { |
| "epoch": 2.0511049723756907, |
| "grad_norm": 0.20273157954216003, |
| "learning_rate": 9.737345699630647e-05, |
| "loss": 0.0586, |
| "step": 2970 |
| }, |
| { |
| "epoch": 2.0580110497237567, |
| "grad_norm": 0.3235710859298706, |
| "learning_rate": 9.734694936548869e-05, |
| "loss": 0.0554, |
| "step": 2980 |
| }, |
| { |
| "epoch": 2.064917127071823, |
| "grad_norm": 0.23521968722343445, |
| "learning_rate": 9.732031228991932e-05, |
| "loss": 0.0533, |
| "step": 2990 |
| }, |
| { |
| "epoch": 2.0718232044198897, |
| "grad_norm": 0.17394329607486725, |
| "learning_rate": 9.729354584242302e-05, |
| "loss": 0.0567, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.0787292817679557, |
| "grad_norm": 0.1692623794078827, |
| "learning_rate": 9.726665009617832e-05, |
| "loss": 0.0512, |
| "step": 3010 |
| }, |
| { |
| "epoch": 2.085635359116022, |
| "grad_norm": 0.30398163199424744, |
| "learning_rate": 9.723962512471714e-05, |
| "loss": 0.0581, |
| "step": 3020 |
| }, |
| { |
| "epoch": 2.092541436464088, |
| "grad_norm": 0.3168894648551941, |
| "learning_rate": 9.72124710019247e-05, |
| "loss": 0.0506, |
| "step": 3030 |
| }, |
| { |
| "epoch": 2.0994475138121547, |
| "grad_norm": 0.22003120183944702, |
| "learning_rate": 9.718518780203934e-05, |
| "loss": 0.0504, |
| "step": 3040 |
| }, |
| { |
| "epoch": 2.106353591160221, |
| "grad_norm": 0.21898090839385986, |
| "learning_rate": 9.715777559965228e-05, |
| "loss": 0.0543, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.113259668508287, |
| "grad_norm": 0.17014583945274353, |
| "learning_rate": 9.713023446970746e-05, |
| "loss": 0.0552, |
| "step": 3060 |
| }, |
| { |
| "epoch": 2.1201657458563536, |
| "grad_norm": 0.2032182514667511, |
| "learning_rate": 9.710256448750126e-05, |
| "loss": 0.0566, |
| "step": 3070 |
| }, |
| { |
| "epoch": 2.12707182320442, |
| "grad_norm": 0.14699594676494598, |
| "learning_rate": 9.707476572868235e-05, |
| "loss": 0.0503, |
| "step": 3080 |
| }, |
| { |
| "epoch": 2.133977900552486, |
| "grad_norm": 0.28991562128067017, |
| "learning_rate": 9.704683826925149e-05, |
| "loss": 0.0569, |
| "step": 3090 |
| }, |
| { |
| "epoch": 2.1408839779005526, |
| "grad_norm": 0.303592324256897, |
| "learning_rate": 9.701878218556129e-05, |
| "loss": 0.0534, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.1477900552486187, |
| "grad_norm": 0.2398727685213089, |
| "learning_rate": 9.699059755431598e-05, |
| "loss": 0.0602, |
| "step": 3110 |
| }, |
| { |
| "epoch": 2.154696132596685, |
| "grad_norm": 0.19923605024814606, |
| "learning_rate": 9.696228445257132e-05, |
| "loss": 0.0508, |
| "step": 3120 |
| }, |
| { |
| "epoch": 2.1616022099447516, |
| "grad_norm": 0.23763804137706757, |
| "learning_rate": 9.693384295773419e-05, |
| "loss": 0.0558, |
| "step": 3130 |
| }, |
| { |
| "epoch": 2.1685082872928176, |
| "grad_norm": 0.1759253740310669, |
| "learning_rate": 9.690527314756259e-05, |
| "loss": 0.0549, |
| "step": 3140 |
| }, |
| { |
| "epoch": 2.175414364640884, |
| "grad_norm": 0.425621896982193, |
| "learning_rate": 9.687657510016527e-05, |
| "loss": 0.059, |
| "step": 3150 |
| }, |
| { |
| "epoch": 2.18232044198895, |
| "grad_norm": 0.3176600933074951, |
| "learning_rate": 9.684774889400161e-05, |
| "loss": 0.0525, |
| "step": 3160 |
| }, |
| { |
| "epoch": 2.1892265193370166, |
| "grad_norm": 0.2451590746641159, |
| "learning_rate": 9.681879460788135e-05, |
| "loss": 0.0563, |
| "step": 3170 |
| }, |
| { |
| "epoch": 2.196132596685083, |
| "grad_norm": 0.1554330587387085, |
| "learning_rate": 9.67897123209644e-05, |
| "loss": 0.0477, |
| "step": 3180 |
| }, |
| { |
| "epoch": 2.203038674033149, |
| "grad_norm": 0.1610517054796219, |
| "learning_rate": 9.676050211276062e-05, |
| "loss": 0.0485, |
| "step": 3190 |
| }, |
| { |
| "epoch": 2.2099447513812156, |
| "grad_norm": 0.23754076659679413, |
| "learning_rate": 9.673116406312962e-05, |
| "loss": 0.0573, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.2168508287292816, |
| "grad_norm": 0.24069735407829285, |
| "learning_rate": 9.67016982522805e-05, |
| "loss": 0.0533, |
| "step": 3210 |
| }, |
| { |
| "epoch": 2.223756906077348, |
| "grad_norm": 0.25840380787849426, |
| "learning_rate": 9.667210476077164e-05, |
| "loss": 0.0488, |
| "step": 3220 |
| }, |
| { |
| "epoch": 2.2306629834254146, |
| "grad_norm": 0.24195915460586548, |
| "learning_rate": 9.664238366951055e-05, |
| "loss": 0.051, |
| "step": 3230 |
| }, |
| { |
| "epoch": 2.2375690607734806, |
| "grad_norm": 0.3614242970943451, |
| "learning_rate": 9.661253505975355e-05, |
| "loss": 0.0475, |
| "step": 3240 |
| }, |
| { |
| "epoch": 2.244475138121547, |
| "grad_norm": 0.18738463521003723, |
| "learning_rate": 9.658255901310557e-05, |
| "loss": 0.0496, |
| "step": 3250 |
| }, |
| { |
| "epoch": 2.251381215469613, |
| "grad_norm": 0.18590529263019562, |
| "learning_rate": 9.655245561152e-05, |
| "loss": 0.0555, |
| "step": 3260 |
| }, |
| { |
| "epoch": 2.2582872928176796, |
| "grad_norm": 0.16456319391727448, |
| "learning_rate": 9.65222249372984e-05, |
| "loss": 0.0497, |
| "step": 3270 |
| }, |
| { |
| "epoch": 2.265193370165746, |
| "grad_norm": 0.2985817790031433, |
| "learning_rate": 9.649186707309026e-05, |
| "loss": 0.0483, |
| "step": 3280 |
| }, |
| { |
| "epoch": 2.272099447513812, |
| "grad_norm": 0.3699333667755127, |
| "learning_rate": 9.646138210189283e-05, |
| "loss": 0.0534, |
| "step": 3290 |
| }, |
| { |
| "epoch": 2.2790055248618786, |
| "grad_norm": 0.2080710232257843, |
| "learning_rate": 9.643077010705087e-05, |
| "loss": 0.0464, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.2859116022099446, |
| "grad_norm": 0.1790868490934372, |
| "learning_rate": 9.640003117225637e-05, |
| "loss": 0.0476, |
| "step": 3310 |
| }, |
| { |
| "epoch": 2.292817679558011, |
| "grad_norm": 0.21831649541854858, |
| "learning_rate": 9.636916538154846e-05, |
| "loss": 0.0504, |
| "step": 3320 |
| }, |
| { |
| "epoch": 2.2997237569060776, |
| "grad_norm": 0.29648539423942566, |
| "learning_rate": 9.633817281931296e-05, |
| "loss": 0.0505, |
| "step": 3330 |
| }, |
| { |
| "epoch": 2.3066298342541436, |
| "grad_norm": 0.2649894952774048, |
| "learning_rate": 9.630705357028242e-05, |
| "loss": 0.0559, |
| "step": 3340 |
| }, |
| { |
| "epoch": 2.31353591160221, |
| "grad_norm": 0.3060481548309326, |
| "learning_rate": 9.627580771953563e-05, |
| "loss": 0.0526, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.320441988950276, |
| "grad_norm": 0.20820169150829315, |
| "learning_rate": 9.624443535249759e-05, |
| "loss": 0.0494, |
| "step": 3360 |
| }, |
| { |
| "epoch": 2.3273480662983426, |
| "grad_norm": 0.16137447953224182, |
| "learning_rate": 9.621293655493913e-05, |
| "loss": 0.0538, |
| "step": 3370 |
| }, |
| { |
| "epoch": 2.334254143646409, |
| "grad_norm": 0.18672014772891998, |
| "learning_rate": 9.618131141297675e-05, |
| "loss": 0.0523, |
| "step": 3380 |
| }, |
| { |
| "epoch": 2.341160220994475, |
| "grad_norm": 0.2763626277446747, |
| "learning_rate": 9.614956001307242e-05, |
| "loss": 0.0512, |
| "step": 3390 |
| }, |
| { |
| "epoch": 2.3480662983425415, |
| "grad_norm": 0.27899008989334106, |
| "learning_rate": 9.611768244203321e-05, |
| "loss": 0.0558, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.3549723756906076, |
| "grad_norm": 0.20031829178333282, |
| "learning_rate": 9.60856787870112e-05, |
| "loss": 0.0458, |
| "step": 3410 |
| }, |
| { |
| "epoch": 2.361878453038674, |
| "grad_norm": 0.26932966709136963, |
| "learning_rate": 9.605354913550318e-05, |
| "loss": 0.0564, |
| "step": 3420 |
| }, |
| { |
| "epoch": 2.3687845303867405, |
| "grad_norm": 0.17095929384231567, |
| "learning_rate": 9.602129357535037e-05, |
| "loss": 0.0479, |
| "step": 3430 |
| }, |
| { |
| "epoch": 2.3756906077348066, |
| "grad_norm": 0.18501821160316467, |
| "learning_rate": 9.598891219473825e-05, |
| "loss": 0.0479, |
| "step": 3440 |
| }, |
| { |
| "epoch": 2.382596685082873, |
| "grad_norm": 0.1560228317975998, |
| "learning_rate": 9.595640508219625e-05, |
| "loss": 0.0504, |
| "step": 3450 |
| }, |
| { |
| "epoch": 2.389502762430939, |
| "grad_norm": 0.24531404674053192, |
| "learning_rate": 9.592377232659761e-05, |
| "loss": 0.0566, |
| "step": 3460 |
| }, |
| { |
| "epoch": 2.3964088397790055, |
| "grad_norm": 0.20866815745830536, |
| "learning_rate": 9.589101401715904e-05, |
| "loss": 0.0444, |
| "step": 3470 |
| }, |
| { |
| "epoch": 2.403314917127072, |
| "grad_norm": 0.20980527997016907, |
| "learning_rate": 9.585813024344045e-05, |
| "loss": 0.0482, |
| "step": 3480 |
| }, |
| { |
| "epoch": 2.410220994475138, |
| "grad_norm": 0.20052018761634827, |
| "learning_rate": 9.58251210953449e-05, |
| "loss": 0.0472, |
| "step": 3490 |
| }, |
| { |
| "epoch": 2.4171270718232045, |
| "grad_norm": 0.20939862728118896, |
| "learning_rate": 9.579198666311809e-05, |
| "loss": 0.0556, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.4240331491712706, |
| "grad_norm": 0.15571282804012299, |
| "learning_rate": 9.575872703734832e-05, |
| "loss": 0.0504, |
| "step": 3510 |
| }, |
| { |
| "epoch": 2.430939226519337, |
| "grad_norm": 0.12131053954362869, |
| "learning_rate": 9.572534230896611e-05, |
| "loss": 0.0456, |
| "step": 3520 |
| }, |
| { |
| "epoch": 2.4378453038674035, |
| "grad_norm": 0.2301919013261795, |
| "learning_rate": 9.569183256924403e-05, |
| "loss": 0.0487, |
| "step": 3530 |
| }, |
| { |
| "epoch": 2.4447513812154695, |
| "grad_norm": 0.24703747034072876, |
| "learning_rate": 9.565819790979646e-05, |
| "loss": 0.051, |
| "step": 3540 |
| }, |
| { |
| "epoch": 2.451657458563536, |
| "grad_norm": 0.23158280551433563, |
| "learning_rate": 9.562443842257925e-05, |
| "loss": 0.051, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.458563535911602, |
| "grad_norm": 0.25929200649261475, |
| "learning_rate": 9.559055419988956e-05, |
| "loss": 0.0515, |
| "step": 3560 |
| }, |
| { |
| "epoch": 2.4654696132596685, |
| "grad_norm": 0.17875461280345917, |
| "learning_rate": 9.555654533436557e-05, |
| "loss": 0.0486, |
| "step": 3570 |
| }, |
| { |
| "epoch": 2.472375690607735, |
| "grad_norm": 0.17399534583091736, |
| "learning_rate": 9.552241191898621e-05, |
| "loss": 0.0505, |
| "step": 3580 |
| }, |
| { |
| "epoch": 2.479281767955801, |
| "grad_norm": 0.19749684631824493, |
| "learning_rate": 9.548815404707092e-05, |
| "loss": 0.0461, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.4861878453038675, |
| "grad_norm": 0.24137671291828156, |
| "learning_rate": 9.545377181227942e-05, |
| "loss": 0.05, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.4930939226519335, |
| "grad_norm": 0.21367807686328888, |
| "learning_rate": 9.541926530861145e-05, |
| "loss": 0.0445, |
| "step": 3610 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.21933801472187042, |
| "learning_rate": 9.538463463040645e-05, |
| "loss": 0.0474, |
| "step": 3620 |
| }, |
| { |
| "epoch": 2.5069060773480665, |
| "grad_norm": 0.14489497244358063, |
| "learning_rate": 9.534987987234337e-05, |
| "loss": 0.0489, |
| "step": 3630 |
| }, |
| { |
| "epoch": 2.5138121546961325, |
| "grad_norm": 0.20172163844108582, |
| "learning_rate": 9.53150011294404e-05, |
| "loss": 0.0501, |
| "step": 3640 |
| }, |
| { |
| "epoch": 2.520718232044199, |
| "grad_norm": 0.24196617305278778, |
| "learning_rate": 9.527999849705471e-05, |
| "loss": 0.0503, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.527624309392265, |
| "grad_norm": 0.15521635115146637, |
| "learning_rate": 9.524487207088213e-05, |
| "loss": 0.0413, |
| "step": 3660 |
| }, |
| { |
| "epoch": 2.5345303867403315, |
| "grad_norm": 0.17045432329177856, |
| "learning_rate": 9.520962194695698e-05, |
| "loss": 0.0507, |
| "step": 3670 |
| }, |
| { |
| "epoch": 2.541436464088398, |
| "grad_norm": 0.21607710421085358, |
| "learning_rate": 9.517424822165175e-05, |
| "loss": 0.0561, |
| "step": 3680 |
| }, |
| { |
| "epoch": 2.548342541436464, |
| "grad_norm": 0.22375300526618958, |
| "learning_rate": 9.513875099167685e-05, |
| "loss": 0.0475, |
| "step": 3690 |
| }, |
| { |
| "epoch": 2.5552486187845305, |
| "grad_norm": 0.20457430183887482, |
| "learning_rate": 9.510313035408035e-05, |
| "loss": 0.0452, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.5621546961325965, |
| "grad_norm": 0.16741271317005157, |
| "learning_rate": 9.506738640624775e-05, |
| "loss": 0.0499, |
| "step": 3710 |
| }, |
| { |
| "epoch": 2.569060773480663, |
| "grad_norm": 0.21843063831329346, |
| "learning_rate": 9.50315192459016e-05, |
| "loss": 0.0466, |
| "step": 3720 |
| }, |
| { |
| "epoch": 2.5759668508287294, |
| "grad_norm": 0.2357361763715744, |
| "learning_rate": 9.499552897110136e-05, |
| "loss": 0.0448, |
| "step": 3730 |
| }, |
| { |
| "epoch": 2.5828729281767955, |
| "grad_norm": 0.19706009328365326, |
| "learning_rate": 9.495941568024304e-05, |
| "loss": 0.0473, |
| "step": 3740 |
| }, |
| { |
| "epoch": 2.589779005524862, |
| "grad_norm": 0.1979895979166031, |
| "learning_rate": 9.492317947205904e-05, |
| "loss": 0.0484, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.596685082872928, |
| "grad_norm": 0.21522966027259827, |
| "learning_rate": 9.488682044561775e-05, |
| "loss": 0.0475, |
| "step": 3760 |
| }, |
| { |
| "epoch": 2.6035911602209945, |
| "grad_norm": 0.21937403082847595, |
| "learning_rate": 9.485033870032335e-05, |
| "loss": 0.049, |
| "step": 3770 |
| }, |
| { |
| "epoch": 2.610497237569061, |
| "grad_norm": 0.2256457507610321, |
| "learning_rate": 9.481373433591556e-05, |
| "loss": 0.049, |
| "step": 3780 |
| }, |
| { |
| "epoch": 2.617403314917127, |
| "grad_norm": 0.17436626553535461, |
| "learning_rate": 9.47770074524693e-05, |
| "loss": 0.0413, |
| "step": 3790 |
| }, |
| { |
| "epoch": 2.6243093922651934, |
| "grad_norm": 0.19035008549690247, |
| "learning_rate": 9.474015815039446e-05, |
| "loss": 0.0464, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.6312154696132595, |
| "grad_norm": 0.1901327669620514, |
| "learning_rate": 9.470318653043565e-05, |
| "loss": 0.0404, |
| "step": 3810 |
| }, |
| { |
| "epoch": 2.638121546961326, |
| "grad_norm": 0.22293877601623535, |
| "learning_rate": 9.466609269367185e-05, |
| "loss": 0.049, |
| "step": 3820 |
| }, |
| { |
| "epoch": 2.6450276243093924, |
| "grad_norm": 0.15183129906654358, |
| "learning_rate": 9.46288767415162e-05, |
| "loss": 0.0499, |
| "step": 3830 |
| }, |
| { |
| "epoch": 2.6519337016574585, |
| "grad_norm": 0.25658154487609863, |
| "learning_rate": 9.459153877571567e-05, |
| "loss": 0.046, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.658839779005525, |
| "grad_norm": 0.15512892603874207, |
| "learning_rate": 9.455407889835087e-05, |
| "loss": 0.0425, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.665745856353591, |
| "grad_norm": 0.28329044580459595, |
| "learning_rate": 9.451649721183564e-05, |
| "loss": 0.0489, |
| "step": 3860 |
| }, |
| { |
| "epoch": 2.6726519337016574, |
| "grad_norm": 0.23139910399913788, |
| "learning_rate": 9.447879381891692e-05, |
| "loss": 0.046, |
| "step": 3870 |
| }, |
| { |
| "epoch": 2.679558011049724, |
| "grad_norm": 0.16557002067565918, |
| "learning_rate": 9.444096882267428e-05, |
| "loss": 0.0493, |
| "step": 3880 |
| }, |
| { |
| "epoch": 2.68646408839779, |
| "grad_norm": 0.21604911983013153, |
| "learning_rate": 9.440302232651988e-05, |
| "loss": 0.0466, |
| "step": 3890 |
| }, |
| { |
| "epoch": 2.6933701657458564, |
| "grad_norm": 0.17157138884067535, |
| "learning_rate": 9.436495443419795e-05, |
| "loss": 0.0507, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.7002762430939224, |
| "grad_norm": 0.22608821094036102, |
| "learning_rate": 9.432676524978466e-05, |
| "loss": 0.0438, |
| "step": 3910 |
| }, |
| { |
| "epoch": 2.707182320441989, |
| "grad_norm": 0.21017669141292572, |
| "learning_rate": 9.42884548776878e-05, |
| "loss": 0.0474, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.7140883977900554, |
| "grad_norm": 0.1866975575685501, |
| "learning_rate": 9.425002342264646e-05, |
| "loss": 0.0472, |
| "step": 3930 |
| }, |
| { |
| "epoch": 2.7209944751381214, |
| "grad_norm": 0.24714712798595428, |
| "learning_rate": 9.421147098973077e-05, |
| "loss": 0.0484, |
| "step": 3940 |
| }, |
| { |
| "epoch": 2.727900552486188, |
| "grad_norm": 0.19898390769958496, |
| "learning_rate": 9.41727976843416e-05, |
| "loss": 0.0532, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.734806629834254, |
| "grad_norm": 0.22621652483940125, |
| "learning_rate": 9.413400361221029e-05, |
| "loss": 0.0465, |
| "step": 3960 |
| }, |
| { |
| "epoch": 2.7417127071823204, |
| "grad_norm": 0.17821535468101501, |
| "learning_rate": 9.409508887939835e-05, |
| "loss": 0.0435, |
| "step": 3970 |
| }, |
| { |
| "epoch": 2.748618784530387, |
| "grad_norm": 0.21015261113643646, |
| "learning_rate": 9.40560535922972e-05, |
| "loss": 0.0441, |
| "step": 3980 |
| }, |
| { |
| "epoch": 2.755524861878453, |
| "grad_norm": 0.1606256365776062, |
| "learning_rate": 9.40168978576278e-05, |
| "loss": 0.0464, |
| "step": 3990 |
| }, |
| { |
| "epoch": 2.7624309392265194, |
| "grad_norm": 0.19328652322292328, |
| "learning_rate": 9.397762178244043e-05, |
| "loss": 0.0486, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.7693370165745854, |
| "grad_norm": 0.3121219575405121, |
| "learning_rate": 9.393822547411439e-05, |
| "loss": 0.0456, |
| "step": 4010 |
| }, |
| { |
| "epoch": 2.776243093922652, |
| "grad_norm": 0.2536201477050781, |
| "learning_rate": 9.389870904035769e-05, |
| "loss": 0.045, |
| "step": 4020 |
| }, |
| { |
| "epoch": 2.7831491712707184, |
| "grad_norm": 0.24330967664718628, |
| "learning_rate": 9.385907258920672e-05, |
| "loss": 0.0495, |
| "step": 4030 |
| }, |
| { |
| "epoch": 2.7900552486187844, |
| "grad_norm": 0.1849660575389862, |
| "learning_rate": 9.381931622902607e-05, |
| "loss": 0.0489, |
| "step": 4040 |
| }, |
| { |
| "epoch": 2.796961325966851, |
| "grad_norm": 0.19356662034988403, |
| "learning_rate": 9.377944006850807e-05, |
| "loss": 0.0391, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.803867403314917, |
| "grad_norm": 0.18262770771980286, |
| "learning_rate": 9.373944421667265e-05, |
| "loss": 0.0478, |
| "step": 4060 |
| }, |
| { |
| "epoch": 2.8107734806629834, |
| "grad_norm": 0.16234233975410461, |
| "learning_rate": 9.369932878286691e-05, |
| "loss": 0.0445, |
| "step": 4070 |
| }, |
| { |
| "epoch": 2.81767955801105, |
| "grad_norm": 0.21413561701774597, |
| "learning_rate": 9.365909387676494e-05, |
| "loss": 0.0452, |
| "step": 4080 |
| }, |
| { |
| "epoch": 2.824585635359116, |
| "grad_norm": 0.17325039207935333, |
| "learning_rate": 9.361873960836744e-05, |
| "loss": 0.0451, |
| "step": 4090 |
| }, |
| { |
| "epoch": 2.8314917127071824, |
| "grad_norm": 0.17369811236858368, |
| "learning_rate": 9.357826608800142e-05, |
| "loss": 0.0476, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.8383977900552484, |
| "grad_norm": 0.1543487012386322, |
| "learning_rate": 9.353767342631994e-05, |
| "loss": 0.0439, |
| "step": 4110 |
| }, |
| { |
| "epoch": 2.845303867403315, |
| "grad_norm": 0.20994071662425995, |
| "learning_rate": 9.34969617343018e-05, |
| "loss": 0.0432, |
| "step": 4120 |
| }, |
| { |
| "epoch": 2.8522099447513813, |
| "grad_norm": 0.23724523186683655, |
| "learning_rate": 9.345613112325122e-05, |
| "loss": 0.0425, |
| "step": 4130 |
| }, |
| { |
| "epoch": 2.8591160220994474, |
| "grad_norm": 0.24331454932689667, |
| "learning_rate": 9.34151817047975e-05, |
| "loss": 0.0427, |
| "step": 4140 |
| }, |
| { |
| "epoch": 2.866022099447514, |
| "grad_norm": 0.19277483224868774, |
| "learning_rate": 9.33741135908948e-05, |
| "loss": 0.0452, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.87292817679558, |
| "grad_norm": 0.17652346193790436, |
| "learning_rate": 9.33329268938218e-05, |
| "loss": 0.0337, |
| "step": 4160 |
| }, |
| { |
| "epoch": 2.8798342541436464, |
| "grad_norm": 0.1750505417585373, |
| "learning_rate": 9.329162172618132e-05, |
| "loss": 0.0461, |
| "step": 4170 |
| }, |
| { |
| "epoch": 2.886740331491713, |
| "grad_norm": 0.20019155740737915, |
| "learning_rate": 9.325019820090013e-05, |
| "loss": 0.0496, |
| "step": 4180 |
| }, |
| { |
| "epoch": 2.893646408839779, |
| "grad_norm": 0.18941523134708405, |
| "learning_rate": 9.320865643122855e-05, |
| "loss": 0.0461, |
| "step": 4190 |
| }, |
| { |
| "epoch": 2.9005524861878453, |
| "grad_norm": 0.1515769213438034, |
| "learning_rate": 9.316699653074023e-05, |
| "loss": 0.0427, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.9074585635359114, |
| "grad_norm": 0.1701519638299942, |
| "learning_rate": 9.312521861333172e-05, |
| "loss": 0.0434, |
| "step": 4210 |
| }, |
| { |
| "epoch": 2.914364640883978, |
| "grad_norm": 0.23309242725372314, |
| "learning_rate": 9.308332279322224e-05, |
| "loss": 0.0456, |
| "step": 4220 |
| }, |
| { |
| "epoch": 2.9212707182320443, |
| "grad_norm": 0.2197219729423523, |
| "learning_rate": 9.304130918495338e-05, |
| "loss": 0.0428, |
| "step": 4230 |
| }, |
| { |
| "epoch": 2.9281767955801103, |
| "grad_norm": 0.16014282405376434, |
| "learning_rate": 9.299917790338874e-05, |
| "loss": 0.0436, |
| "step": 4240 |
| }, |
| { |
| "epoch": 2.935082872928177, |
| "grad_norm": 0.18948529660701752, |
| "learning_rate": 9.295692906371363e-05, |
| "loss": 0.0487, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.941988950276243, |
| "grad_norm": 0.1489516645669937, |
| "learning_rate": 9.291456278143476e-05, |
| "loss": 0.0446, |
| "step": 4260 |
| }, |
| { |
| "epoch": 2.9488950276243093, |
| "grad_norm": 0.1619735211133957, |
| "learning_rate": 9.287207917237994e-05, |
| "loss": 0.0399, |
| "step": 4270 |
| }, |
| { |
| "epoch": 2.955801104972376, |
| "grad_norm": 0.20590822398662567, |
| "learning_rate": 9.282947835269773e-05, |
| "loss": 0.0452, |
| "step": 4280 |
| }, |
| { |
| "epoch": 2.962707182320442, |
| "grad_norm": 0.16909535229206085, |
| "learning_rate": 9.278676043885715e-05, |
| "loss": 0.0384, |
| "step": 4290 |
| }, |
| { |
| "epoch": 2.9696132596685083, |
| "grad_norm": 0.184917613863945, |
| "learning_rate": 9.274392554764733e-05, |
| "loss": 0.0393, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.9765193370165743, |
| "grad_norm": 0.2012282758951187, |
| "learning_rate": 9.270097379617723e-05, |
| "loss": 0.0419, |
| "step": 4310 |
| }, |
| { |
| "epoch": 2.983425414364641, |
| "grad_norm": 0.188528373837471, |
| "learning_rate": 9.26579053018753e-05, |
| "loss": 0.0462, |
| "step": 4320 |
| }, |
| { |
| "epoch": 2.9903314917127073, |
| "grad_norm": 0.21025685966014862, |
| "learning_rate": 9.261472018248918e-05, |
| "loss": 0.0403, |
| "step": 4330 |
| }, |
| { |
| "epoch": 2.9972375690607733, |
| "grad_norm": 0.2396075278520584, |
| "learning_rate": 9.25714185560853e-05, |
| "loss": 0.0398, |
| "step": 4340 |
| }, |
| { |
| "epoch": 3.00414364640884, |
| "grad_norm": 0.22464925050735474, |
| "learning_rate": 9.252800054104868e-05, |
| "loss": 0.0449, |
| "step": 4350 |
| }, |
| { |
| "epoch": 3.0110497237569063, |
| "grad_norm": 0.1573522537946701, |
| "learning_rate": 9.248446625608252e-05, |
| "loss": 0.0373, |
| "step": 4360 |
| }, |
| { |
| "epoch": 3.0179558011049723, |
| "grad_norm": 0.1664257049560547, |
| "learning_rate": 9.244081582020789e-05, |
| "loss": 0.0428, |
| "step": 4370 |
| }, |
| { |
| "epoch": 3.0248618784530388, |
| "grad_norm": 0.219674214720726, |
| "learning_rate": 9.239704935276339e-05, |
| "loss": 0.0433, |
| "step": 4380 |
| }, |
| { |
| "epoch": 3.031767955801105, |
| "grad_norm": 0.18510626256465912, |
| "learning_rate": 9.235316697340489e-05, |
| "loss": 0.0425, |
| "step": 4390 |
| }, |
| { |
| "epoch": 3.0386740331491713, |
| "grad_norm": 0.14783309400081635, |
| "learning_rate": 9.230916880210512e-05, |
| "loss": 0.0501, |
| "step": 4400 |
| }, |
| { |
| "epoch": 3.0455801104972378, |
| "grad_norm": 0.20491963624954224, |
| "learning_rate": 9.226505495915342e-05, |
| "loss": 0.047, |
| "step": 4410 |
| }, |
| { |
| "epoch": 3.052486187845304, |
| "grad_norm": 0.19762618839740753, |
| "learning_rate": 9.222082556515536e-05, |
| "loss": 0.0408, |
| "step": 4420 |
| }, |
| { |
| "epoch": 3.0593922651933703, |
| "grad_norm": 0.1874648630619049, |
| "learning_rate": 9.217648074103242e-05, |
| "loss": 0.0438, |
| "step": 4430 |
| }, |
| { |
| "epoch": 3.0662983425414363, |
| "grad_norm": 0.15683989226818085, |
| "learning_rate": 9.213202060802161e-05, |
| "loss": 0.0471, |
| "step": 4440 |
| }, |
| { |
| "epoch": 3.0732044198895028, |
| "grad_norm": 0.19267770648002625, |
| "learning_rate": 9.208744528767528e-05, |
| "loss": 0.0424, |
| "step": 4450 |
| }, |
| { |
| "epoch": 3.0801104972375692, |
| "grad_norm": 0.20659422874450684, |
| "learning_rate": 9.204275490186064e-05, |
| "loss": 0.0431, |
| "step": 4460 |
| }, |
| { |
| "epoch": 3.0870165745856353, |
| "grad_norm": 0.1620733141899109, |
| "learning_rate": 9.199794957275949e-05, |
| "loss": 0.04, |
| "step": 4470 |
| }, |
| { |
| "epoch": 3.0939226519337018, |
| "grad_norm": 0.21852877736091614, |
| "learning_rate": 9.19530294228679e-05, |
| "loss": 0.0434, |
| "step": 4480 |
| }, |
| { |
| "epoch": 3.100828729281768, |
| "grad_norm": 0.18497471511363983, |
| "learning_rate": 9.190799457499583e-05, |
| "loss": 0.0427, |
| "step": 4490 |
| }, |
| { |
| "epoch": 3.1077348066298343, |
| "grad_norm": 0.20119594037532806, |
| "learning_rate": 9.186284515226686e-05, |
| "loss": 0.0445, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.1146408839779007, |
| "grad_norm": 0.16487465798854828, |
| "learning_rate": 9.181758127811777e-05, |
| "loss": 0.0463, |
| "step": 4510 |
| }, |
| { |
| "epoch": 3.1215469613259668, |
| "grad_norm": 0.18515750765800476, |
| "learning_rate": 9.177220307629825e-05, |
| "loss": 0.0434, |
| "step": 4520 |
| }, |
| { |
| "epoch": 3.1284530386740332, |
| "grad_norm": 0.15298950672149658, |
| "learning_rate": 9.172671067087059e-05, |
| "loss": 0.0426, |
| "step": 4530 |
| }, |
| { |
| "epoch": 3.1353591160220993, |
| "grad_norm": 0.2684433162212372, |
| "learning_rate": 9.16811041862093e-05, |
| "loss": 0.0368, |
| "step": 4540 |
| }, |
| { |
| "epoch": 3.1422651933701657, |
| "grad_norm": 0.16758117079734802, |
| "learning_rate": 9.163538374700076e-05, |
| "loss": 0.0406, |
| "step": 4550 |
| }, |
| { |
| "epoch": 3.149171270718232, |
| "grad_norm": 0.16840502619743347, |
| "learning_rate": 9.158954947824287e-05, |
| "loss": 0.0366, |
| "step": 4560 |
| }, |
| { |
| "epoch": 3.1560773480662982, |
| "grad_norm": 0.11108331382274628, |
| "learning_rate": 9.154360150524482e-05, |
| "loss": 0.0367, |
| "step": 4570 |
| }, |
| { |
| "epoch": 3.1629834254143647, |
| "grad_norm": 0.189457967877388, |
| "learning_rate": 9.14975399536266e-05, |
| "loss": 0.0422, |
| "step": 4580 |
| }, |
| { |
| "epoch": 3.1698895027624308, |
| "grad_norm": 0.19346898794174194, |
| "learning_rate": 9.14513649493187e-05, |
| "loss": 0.0437, |
| "step": 4590 |
| }, |
| { |
| "epoch": 3.1767955801104972, |
| "grad_norm": 0.3062164783477783, |
| "learning_rate": 9.140507661856187e-05, |
| "loss": 0.0427, |
| "step": 4600 |
| }, |
| { |
| "epoch": 3.1837016574585637, |
| "grad_norm": 0.18615876138210297, |
| "learning_rate": 9.135867508790661e-05, |
| "loss": 0.0413, |
| "step": 4610 |
| }, |
| { |
| "epoch": 3.1906077348066297, |
| "grad_norm": 0.18920812010765076, |
| "learning_rate": 9.131216048421291e-05, |
| "loss": 0.0466, |
| "step": 4620 |
| }, |
| { |
| "epoch": 3.197513812154696, |
| "grad_norm": 0.1465039700269699, |
| "learning_rate": 9.126553293464998e-05, |
| "loss": 0.0442, |
| "step": 4630 |
| }, |
| { |
| "epoch": 3.2044198895027622, |
| "grad_norm": 0.2439999282360077, |
| "learning_rate": 9.121879256669572e-05, |
| "loss": 0.0458, |
| "step": 4640 |
| }, |
| { |
| "epoch": 3.2113259668508287, |
| "grad_norm": 0.40056276321411133, |
| "learning_rate": 9.117193950813652e-05, |
| "loss": 0.0454, |
| "step": 4650 |
| }, |
| { |
| "epoch": 3.218232044198895, |
| "grad_norm": 0.18807795643806458, |
| "learning_rate": 9.112497388706685e-05, |
| "loss": 0.0508, |
| "step": 4660 |
| }, |
| { |
| "epoch": 3.2251381215469612, |
| "grad_norm": 0.18762271106243134, |
| "learning_rate": 9.10778958318889e-05, |
| "loss": 0.0425, |
| "step": 4670 |
| }, |
| { |
| "epoch": 3.2320441988950277, |
| "grad_norm": 0.19282494485378265, |
| "learning_rate": 9.103070547131232e-05, |
| "loss": 0.0353, |
| "step": 4680 |
| }, |
| { |
| "epoch": 3.2389502762430937, |
| "grad_norm": 0.2008621245622635, |
| "learning_rate": 9.098340293435375e-05, |
| "loss": 0.0406, |
| "step": 4690 |
| }, |
| { |
| "epoch": 3.24585635359116, |
| "grad_norm": 0.1510397046804428, |
| "learning_rate": 9.093598835033649e-05, |
| "loss": 0.0439, |
| "step": 4700 |
| }, |
| { |
| "epoch": 3.2527624309392267, |
| "grad_norm": 0.22977766394615173, |
| "learning_rate": 9.088846184889021e-05, |
| "loss": 0.0414, |
| "step": 4710 |
| }, |
| { |
| "epoch": 3.2596685082872927, |
| "grad_norm": 0.17466506361961365, |
| "learning_rate": 9.084082355995057e-05, |
| "loss": 0.0443, |
| "step": 4720 |
| }, |
| { |
| "epoch": 3.266574585635359, |
| "grad_norm": 0.17879237234592438, |
| "learning_rate": 9.079307361375882e-05, |
| "loss": 0.0425, |
| "step": 4730 |
| }, |
| { |
| "epoch": 3.2734806629834257, |
| "grad_norm": 0.20717912912368774, |
| "learning_rate": 9.074521214086149e-05, |
| "loss": 0.0373, |
| "step": 4740 |
| }, |
| { |
| "epoch": 3.2803867403314917, |
| "grad_norm": 0.19752268493175507, |
| "learning_rate": 9.069723927211001e-05, |
| "loss": 0.0391, |
| "step": 4750 |
| }, |
| { |
| "epoch": 3.287292817679558, |
| "grad_norm": 0.19491535425186157, |
| "learning_rate": 9.064915513866037e-05, |
| "loss": 0.0434, |
| "step": 4760 |
| }, |
| { |
| "epoch": 3.294198895027624, |
| "grad_norm": 0.21187685430049896, |
| "learning_rate": 9.060095987197279e-05, |
| "loss": 0.0402, |
| "step": 4770 |
| }, |
| { |
| "epoch": 3.3011049723756907, |
| "grad_norm": 0.15764431655406952, |
| "learning_rate": 9.055265360381126e-05, |
| "loss": 0.0423, |
| "step": 4780 |
| }, |
| { |
| "epoch": 3.308011049723757, |
| "grad_norm": 0.15811102092266083, |
| "learning_rate": 9.050423646624326e-05, |
| "loss": 0.0385, |
| "step": 4790 |
| }, |
| { |
| "epoch": 3.314917127071823, |
| "grad_norm": 0.16640017926692963, |
| "learning_rate": 9.045570859163943e-05, |
| "loss": 0.0427, |
| "step": 4800 |
| }, |
| { |
| "epoch": 3.3218232044198897, |
| "grad_norm": 0.15784433484077454, |
| "learning_rate": 9.04070701126731e-05, |
| "loss": 0.0415, |
| "step": 4810 |
| }, |
| { |
| "epoch": 3.3287292817679557, |
| "grad_norm": 0.1673387885093689, |
| "learning_rate": 9.035832116232001e-05, |
| "loss": 0.0386, |
| "step": 4820 |
| }, |
| { |
| "epoch": 3.335635359116022, |
| "grad_norm": 0.2144685834646225, |
| "learning_rate": 9.030946187385796e-05, |
| "loss": 0.0497, |
| "step": 4830 |
| }, |
| { |
| "epoch": 3.3425414364640886, |
| "grad_norm": 0.23754587769508362, |
| "learning_rate": 9.026049238086635e-05, |
| "loss": 0.048, |
| "step": 4840 |
| }, |
| { |
| "epoch": 3.3494475138121547, |
| "grad_norm": 0.1485087275505066, |
| "learning_rate": 9.021141281722591e-05, |
| "loss": 0.0363, |
| "step": 4850 |
| }, |
| { |
| "epoch": 3.356353591160221, |
| "grad_norm": 0.19838620722293854, |
| "learning_rate": 9.01622233171183e-05, |
| "loss": 0.0398, |
| "step": 4860 |
| }, |
| { |
| "epoch": 3.363259668508287, |
| "grad_norm": 0.22358939051628113, |
| "learning_rate": 9.011292401502574e-05, |
| "loss": 0.044, |
| "step": 4870 |
| }, |
| { |
| "epoch": 3.3701657458563536, |
| "grad_norm": 0.21143372356891632, |
| "learning_rate": 9.006351504573063e-05, |
| "loss": 0.042, |
| "step": 4880 |
| }, |
| { |
| "epoch": 3.37707182320442, |
| "grad_norm": 0.19049137830734253, |
| "learning_rate": 9.001399654431519e-05, |
| "loss": 0.0403, |
| "step": 4890 |
| }, |
| { |
| "epoch": 3.383977900552486, |
| "grad_norm": 0.20016619563102722, |
| "learning_rate": 8.996436864616116e-05, |
| "loss": 0.042, |
| "step": 4900 |
| }, |
| { |
| "epoch": 3.3908839779005526, |
| "grad_norm": 0.16115543246269226, |
| "learning_rate": 8.991463148694925e-05, |
| "loss": 0.0387, |
| "step": 4910 |
| }, |
| { |
| "epoch": 3.3977900552486187, |
| "grad_norm": 0.18714101612567902, |
| "learning_rate": 8.986478520265902e-05, |
| "loss": 0.0357, |
| "step": 4920 |
| }, |
| { |
| "epoch": 3.404696132596685, |
| "grad_norm": 0.23854629695415497, |
| "learning_rate": 8.981482992956827e-05, |
| "loss": 0.0414, |
| "step": 4930 |
| }, |
| { |
| "epoch": 3.4116022099447516, |
| "grad_norm": 0.18258176743984222, |
| "learning_rate": 8.976476580425282e-05, |
| "loss": 0.0406, |
| "step": 4940 |
| }, |
| { |
| "epoch": 3.4185082872928176, |
| "grad_norm": 0.1593986600637436, |
| "learning_rate": 8.971459296358606e-05, |
| "loss": 0.0413, |
| "step": 4950 |
| }, |
| { |
| "epoch": 3.425414364640884, |
| "grad_norm": 0.2746127247810364, |
| "learning_rate": 8.966431154473864e-05, |
| "loss": 0.0399, |
| "step": 4960 |
| }, |
| { |
| "epoch": 3.43232044198895, |
| "grad_norm": 0.23390236496925354, |
| "learning_rate": 8.961392168517803e-05, |
| "loss": 0.0413, |
| "step": 4970 |
| }, |
| { |
| "epoch": 3.4392265193370166, |
| "grad_norm": 0.1735554188489914, |
| "learning_rate": 8.956342352266821e-05, |
| "loss": 0.0452, |
| "step": 4980 |
| }, |
| { |
| "epoch": 3.446132596685083, |
| "grad_norm": 0.15287978947162628, |
| "learning_rate": 8.95128171952692e-05, |
| "loss": 0.0384, |
| "step": 4990 |
| }, |
| { |
| "epoch": 3.453038674033149, |
| "grad_norm": 0.16149814426898956, |
| "learning_rate": 8.946210284133676e-05, |
| "loss": 0.0414, |
| "step": 5000 |
| }, |
| { |
| "epoch": 3.4599447513812156, |
| "grad_norm": 0.1760193556547165, |
| "learning_rate": 8.941128059952201e-05, |
| "loss": 0.0426, |
| "step": 5010 |
| }, |
| { |
| "epoch": 3.4668508287292816, |
| "grad_norm": 0.21478940546512604, |
| "learning_rate": 8.936035060877102e-05, |
| "loss": 0.041, |
| "step": 5020 |
| }, |
| { |
| "epoch": 3.473756906077348, |
| "grad_norm": 0.20234301686286926, |
| "learning_rate": 8.930931300832443e-05, |
| "loss": 0.0413, |
| "step": 5030 |
| }, |
| { |
| "epoch": 3.4806629834254146, |
| "grad_norm": 0.1900159865617752, |
| "learning_rate": 8.925816793771711e-05, |
| "loss": 0.0405, |
| "step": 5040 |
| }, |
| { |
| "epoch": 3.4875690607734806, |
| "grad_norm": 0.15434470772743225, |
| "learning_rate": 8.92069155367777e-05, |
| "loss": 0.0383, |
| "step": 5050 |
| }, |
| { |
| "epoch": 3.494475138121547, |
| "grad_norm": 0.14990466833114624, |
| "learning_rate": 8.915555594562834e-05, |
| "loss": 0.04, |
| "step": 5060 |
| }, |
| { |
| "epoch": 3.501381215469613, |
| "grad_norm": 0.14787523448467255, |
| "learning_rate": 8.910408930468416e-05, |
| "loss": 0.0332, |
| "step": 5070 |
| }, |
| { |
| "epoch": 3.5082872928176796, |
| "grad_norm": 0.17466671764850616, |
| "learning_rate": 8.905251575465303e-05, |
| "loss": 0.0406, |
| "step": 5080 |
| }, |
| { |
| "epoch": 3.515193370165746, |
| "grad_norm": 0.20608386397361755, |
| "learning_rate": 8.900083543653502e-05, |
| "loss": 0.0432, |
| "step": 5090 |
| }, |
| { |
| "epoch": 3.522099447513812, |
| "grad_norm": 0.15128932893276215, |
| "learning_rate": 8.894904849162218e-05, |
| "loss": 0.0392, |
| "step": 5100 |
| }, |
| { |
| "epoch": 3.5290055248618786, |
| "grad_norm": 0.21203255653381348, |
| "learning_rate": 8.889715506149802e-05, |
| "loss": 0.0376, |
| "step": 5110 |
| }, |
| { |
| "epoch": 3.5359116022099446, |
| "grad_norm": 0.18824951350688934, |
| "learning_rate": 8.884515528803722e-05, |
| "loss": 0.0444, |
| "step": 5120 |
| }, |
| { |
| "epoch": 3.542817679558011, |
| "grad_norm": 0.18114745616912842, |
| "learning_rate": 8.879304931340517e-05, |
| "loss": 0.0354, |
| "step": 5130 |
| }, |
| { |
| "epoch": 3.5497237569060776, |
| "grad_norm": 0.16870130598545074, |
| "learning_rate": 8.874083728005759e-05, |
| "loss": 0.036, |
| "step": 5140 |
| }, |
| { |
| "epoch": 3.5566298342541436, |
| "grad_norm": 0.17004938423633575, |
| "learning_rate": 8.868851933074021e-05, |
| "loss": 0.0384, |
| "step": 5150 |
| }, |
| { |
| "epoch": 3.56353591160221, |
| "grad_norm": 0.21150527894496918, |
| "learning_rate": 8.863609560848829e-05, |
| "loss": 0.0415, |
| "step": 5160 |
| }, |
| { |
| "epoch": 3.570441988950276, |
| "grad_norm": 0.2698124945163727, |
| "learning_rate": 8.85835662566263e-05, |
| "loss": 0.0314, |
| "step": 5170 |
| }, |
| { |
| "epoch": 3.5773480662983426, |
| "grad_norm": 0.2014392763376236, |
| "learning_rate": 8.853093141876747e-05, |
| "loss": 0.0394, |
| "step": 5180 |
| }, |
| { |
| "epoch": 3.584254143646409, |
| "grad_norm": 0.18041956424713135, |
| "learning_rate": 8.847819123881343e-05, |
| "loss": 0.0424, |
| "step": 5190 |
| }, |
| { |
| "epoch": 3.591160220994475, |
| "grad_norm": 0.15482786297798157, |
| "learning_rate": 8.842534586095383e-05, |
| "loss": 0.0444, |
| "step": 5200 |
| }, |
| { |
| "epoch": 3.5980662983425415, |
| "grad_norm": 0.1852821409702301, |
| "learning_rate": 8.837239542966593e-05, |
| "loss": 0.0359, |
| "step": 5210 |
| }, |
| { |
| "epoch": 3.6049723756906076, |
| "grad_norm": 0.21371380984783173, |
| "learning_rate": 8.831934008971417e-05, |
| "loss": 0.0416, |
| "step": 5220 |
| }, |
| { |
| "epoch": 3.611878453038674, |
| "grad_norm": 0.11043145507574081, |
| "learning_rate": 8.826617998614982e-05, |
| "loss": 0.0354, |
| "step": 5230 |
| }, |
| { |
| "epoch": 3.6187845303867405, |
| "grad_norm": 0.15232881903648376, |
| "learning_rate": 8.821291526431056e-05, |
| "loss": 0.0354, |
| "step": 5240 |
| }, |
| { |
| "epoch": 3.6256906077348066, |
| "grad_norm": 0.14688993990421295, |
| "learning_rate": 8.815954606982015e-05, |
| "loss": 0.034, |
| "step": 5250 |
| }, |
| { |
| "epoch": 3.632596685082873, |
| "grad_norm": 0.1413627713918686, |
| "learning_rate": 8.810607254858789e-05, |
| "loss": 0.0399, |
| "step": 5260 |
| }, |
| { |
| "epoch": 3.639502762430939, |
| "grad_norm": 0.18085753917694092, |
| "learning_rate": 8.805249484680838e-05, |
| "loss": 0.0412, |
| "step": 5270 |
| }, |
| { |
| "epoch": 3.6464088397790055, |
| "grad_norm": 0.17890624701976776, |
| "learning_rate": 8.799881311096096e-05, |
| "loss": 0.0399, |
| "step": 5280 |
| }, |
| { |
| "epoch": 3.653314917127072, |
| "grad_norm": 0.1451053023338318, |
| "learning_rate": 8.794502748780949e-05, |
| "loss": 0.0406, |
| "step": 5290 |
| }, |
| { |
| "epoch": 3.660220994475138, |
| "grad_norm": 0.14209549129009247, |
| "learning_rate": 8.78911381244018e-05, |
| "loss": 0.0376, |
| "step": 5300 |
| }, |
| { |
| "epoch": 3.6671270718232045, |
| "grad_norm": 0.17420484125614166, |
| "learning_rate": 8.783714516806933e-05, |
| "loss": 0.0361, |
| "step": 5310 |
| }, |
| { |
| "epoch": 3.6740331491712706, |
| "grad_norm": 0.20409078896045685, |
| "learning_rate": 8.77830487664268e-05, |
| "loss": 0.0359, |
| "step": 5320 |
| }, |
| { |
| "epoch": 3.680939226519337, |
| "grad_norm": 0.13683636486530304, |
| "learning_rate": 8.772884906737167e-05, |
| "loss": 0.0395, |
| "step": 5330 |
| }, |
| { |
| "epoch": 3.6878453038674035, |
| "grad_norm": 0.16172552108764648, |
| "learning_rate": 8.767454621908387e-05, |
| "loss": 0.0359, |
| "step": 5340 |
| }, |
| { |
| "epoch": 3.6947513812154695, |
| "grad_norm": 0.1768387109041214, |
| "learning_rate": 8.76201403700253e-05, |
| "loss": 0.036, |
| "step": 5350 |
| }, |
| { |
| "epoch": 3.701657458563536, |
| "grad_norm": 0.22492876648902893, |
| "learning_rate": 8.756563166893949e-05, |
| "loss": 0.0379, |
| "step": 5360 |
| }, |
| { |
| "epoch": 3.708563535911602, |
| "grad_norm": 0.16830484569072723, |
| "learning_rate": 8.751102026485113e-05, |
| "loss": 0.0436, |
| "step": 5370 |
| }, |
| { |
| "epoch": 3.7154696132596685, |
| "grad_norm": 0.17052343487739563, |
| "learning_rate": 8.745630630706571e-05, |
| "loss": 0.0385, |
| "step": 5380 |
| }, |
| { |
| "epoch": 3.722375690607735, |
| "grad_norm": 0.22967390716075897, |
| "learning_rate": 8.740148994516912e-05, |
| "loss": 0.0383, |
| "step": 5390 |
| }, |
| { |
| "epoch": 3.729281767955801, |
| "grad_norm": 0.2420748621225357, |
| "learning_rate": 8.73465713290272e-05, |
| "loss": 0.0377, |
| "step": 5400 |
| }, |
| { |
| "epoch": 3.7361878453038675, |
| "grad_norm": 0.20598067343235016, |
| "learning_rate": 8.729155060878533e-05, |
| "loss": 0.0394, |
| "step": 5410 |
| }, |
| { |
| "epoch": 3.7430939226519335, |
| "grad_norm": 0.17275604605674744, |
| "learning_rate": 8.723642793486809e-05, |
| "loss": 0.0336, |
| "step": 5420 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 0.26493966579437256, |
| "learning_rate": 8.718120345797873e-05, |
| "loss": 0.0392, |
| "step": 5430 |
| }, |
| { |
| "epoch": 3.7569060773480665, |
| "grad_norm": 0.21323080360889435, |
| "learning_rate": 8.712587732909889e-05, |
| "loss": 0.0373, |
| "step": 5440 |
| }, |
| { |
| "epoch": 3.7638121546961325, |
| "grad_norm": 0.2604227364063263, |
| "learning_rate": 8.707044969948806e-05, |
| "loss": 0.0406, |
| "step": 5450 |
| }, |
| { |
| "epoch": 3.770718232044199, |
| "grad_norm": 0.14074908196926117, |
| "learning_rate": 8.701492072068329e-05, |
| "loss": 0.0366, |
| "step": 5460 |
| }, |
| { |
| "epoch": 3.777624309392265, |
| "grad_norm": 0.15715032815933228, |
| "learning_rate": 8.695929054449869e-05, |
| "loss": 0.0371, |
| "step": 5470 |
| }, |
| { |
| "epoch": 3.7845303867403315, |
| "grad_norm": 0.1916290670633316, |
| "learning_rate": 8.690355932302501e-05, |
| "loss": 0.0323, |
| "step": 5480 |
| }, |
| { |
| "epoch": 3.791436464088398, |
| "grad_norm": 0.22312071919441223, |
| "learning_rate": 8.684772720862931e-05, |
| "loss": 0.0412, |
| "step": 5490 |
| }, |
| { |
| "epoch": 3.798342541436464, |
| "grad_norm": 0.19781441986560822, |
| "learning_rate": 8.679179435395446e-05, |
| "loss": 0.0358, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.8052486187845305, |
| "grad_norm": 0.1877889633178711, |
| "learning_rate": 8.673576091191874e-05, |
| "loss": 0.0359, |
| "step": 5510 |
| }, |
| { |
| "epoch": 3.8121546961325965, |
| "grad_norm": 0.17329281568527222, |
| "learning_rate": 8.667962703571541e-05, |
| "loss": 0.0331, |
| "step": 5520 |
| }, |
| { |
| "epoch": 3.819060773480663, |
| "grad_norm": 0.23658014833927155, |
| "learning_rate": 8.662339287881238e-05, |
| "loss": 0.0427, |
| "step": 5530 |
| }, |
| { |
| "epoch": 3.8259668508287294, |
| "grad_norm": 0.20422625541687012, |
| "learning_rate": 8.656705859495169e-05, |
| "loss": 0.041, |
| "step": 5540 |
| }, |
| { |
| "epoch": 3.8328729281767955, |
| "grad_norm": 0.17714661359786987, |
| "learning_rate": 8.651062433814912e-05, |
| "loss": 0.0318, |
| "step": 5550 |
| }, |
| { |
| "epoch": 3.839779005524862, |
| "grad_norm": 0.18044039607048035, |
| "learning_rate": 8.645409026269375e-05, |
| "loss": 0.0389, |
| "step": 5560 |
| }, |
| { |
| "epoch": 3.846685082872928, |
| "grad_norm": 0.14779943227767944, |
| "learning_rate": 8.639745652314759e-05, |
| "loss": 0.0314, |
| "step": 5570 |
| }, |
| { |
| "epoch": 3.8535911602209945, |
| "grad_norm": 0.16303597390651703, |
| "learning_rate": 8.634072327434515e-05, |
| "loss": 0.037, |
| "step": 5580 |
| }, |
| { |
| "epoch": 3.860497237569061, |
| "grad_norm": 0.17153339087963104, |
| "learning_rate": 8.628389067139294e-05, |
| "loss": 0.0386, |
| "step": 5590 |
| }, |
| { |
| "epoch": 3.867403314917127, |
| "grad_norm": 0.2033110409975052, |
| "learning_rate": 8.622695886966911e-05, |
| "loss": 0.0354, |
| "step": 5600 |
| }, |
| { |
| "epoch": 3.8743093922651934, |
| "grad_norm": 0.16191303730010986, |
| "learning_rate": 8.616992802482308e-05, |
| "loss": 0.0403, |
| "step": 5610 |
| }, |
| { |
| "epoch": 3.8812154696132595, |
| "grad_norm": 0.1545790135860443, |
| "learning_rate": 8.611279829277496e-05, |
| "loss": 0.038, |
| "step": 5620 |
| }, |
| { |
| "epoch": 3.888121546961326, |
| "grad_norm": 0.18758025765419006, |
| "learning_rate": 8.605556982971528e-05, |
| "loss": 0.0355, |
| "step": 5630 |
| }, |
| { |
| "epoch": 3.8950276243093924, |
| "grad_norm": 0.16228602826595306, |
| "learning_rate": 8.599824279210447e-05, |
| "loss": 0.0389, |
| "step": 5640 |
| }, |
| { |
| "epoch": 3.9019337016574585, |
| "grad_norm": 0.1846267133951187, |
| "learning_rate": 8.594081733667243e-05, |
| "loss": 0.0343, |
| "step": 5650 |
| }, |
| { |
| "epoch": 3.908839779005525, |
| "grad_norm": 0.22637325525283813, |
| "learning_rate": 8.58832936204182e-05, |
| "loss": 0.0358, |
| "step": 5660 |
| }, |
| { |
| "epoch": 3.915745856353591, |
| "grad_norm": 0.15883108973503113, |
| "learning_rate": 8.582567180060942e-05, |
| "loss": 0.037, |
| "step": 5670 |
| }, |
| { |
| "epoch": 3.9226519337016574, |
| "grad_norm": 0.20179514586925507, |
| "learning_rate": 8.576795203478194e-05, |
| "loss": 0.0367, |
| "step": 5680 |
| }, |
| { |
| "epoch": 3.929558011049724, |
| "grad_norm": 0.226895272731781, |
| "learning_rate": 8.571013448073939e-05, |
| "loss": 0.0422, |
| "step": 5690 |
| }, |
| { |
| "epoch": 3.93646408839779, |
| "grad_norm": 0.1747041642665863, |
| "learning_rate": 8.565221929655275e-05, |
| "loss": 0.0324, |
| "step": 5700 |
| }, |
| { |
| "epoch": 3.9433701657458564, |
| "grad_norm": 0.22531364858150482, |
| "learning_rate": 8.559420664055992e-05, |
| "loss": 0.0398, |
| "step": 5710 |
| }, |
| { |
| "epoch": 3.9502762430939224, |
| "grad_norm": 0.2313089370727539, |
| "learning_rate": 8.553609667136532e-05, |
| "loss": 0.0418, |
| "step": 5720 |
| }, |
| { |
| "epoch": 3.957182320441989, |
| "grad_norm": 0.2609806954860687, |
| "learning_rate": 8.547788954783936e-05, |
| "loss": 0.0445, |
| "step": 5730 |
| }, |
| { |
| "epoch": 3.9640883977900554, |
| "grad_norm": 0.18635094165802002, |
| "learning_rate": 8.541958542911808e-05, |
| "loss": 0.0337, |
| "step": 5740 |
| }, |
| { |
| "epoch": 3.9709944751381214, |
| "grad_norm": 0.20794489979743958, |
| "learning_rate": 8.536118447460275e-05, |
| "loss": 0.0365, |
| "step": 5750 |
| }, |
| { |
| "epoch": 3.977900552486188, |
| "grad_norm": 0.19042007625102997, |
| "learning_rate": 8.530268684395932e-05, |
| "loss": 0.0357, |
| "step": 5760 |
| }, |
| { |
| "epoch": 3.984806629834254, |
| "grad_norm": 0.30799993872642517, |
| "learning_rate": 8.524409269711807e-05, |
| "loss": 0.0402, |
| "step": 5770 |
| }, |
| { |
| "epoch": 3.9917127071823204, |
| "grad_norm": 0.19904528558254242, |
| "learning_rate": 8.51854021942732e-05, |
| "loss": 0.0394, |
| "step": 5780 |
| }, |
| { |
| "epoch": 3.998618784530387, |
| "grad_norm": 0.16762062907218933, |
| "learning_rate": 8.512661549588227e-05, |
| "loss": 0.038, |
| "step": 5790 |
| }, |
| { |
| "epoch": 4.005524861878453, |
| "grad_norm": 0.1410869061946869, |
| "learning_rate": 8.506773276266588e-05, |
| "loss": 0.0316, |
| "step": 5800 |
| }, |
| { |
| "epoch": 4.012430939226519, |
| "grad_norm": 0.17487668991088867, |
| "learning_rate": 8.500875415560721e-05, |
| "loss": 0.034, |
| "step": 5810 |
| }, |
| { |
| "epoch": 4.019337016574585, |
| "grad_norm": 0.15733812749385834, |
| "learning_rate": 8.494967983595144e-05, |
| "loss": 0.0327, |
| "step": 5820 |
| }, |
| { |
| "epoch": 4.026243093922652, |
| "grad_norm": 0.15330734848976135, |
| "learning_rate": 8.489050996520558e-05, |
| "loss": 0.038, |
| "step": 5830 |
| }, |
| { |
| "epoch": 4.033149171270718, |
| "grad_norm": 0.10522384196519852, |
| "learning_rate": 8.483124470513775e-05, |
| "loss": 0.0361, |
| "step": 5840 |
| }, |
| { |
| "epoch": 4.040055248618785, |
| "grad_norm": 0.18659597635269165, |
| "learning_rate": 8.477188421777692e-05, |
| "loss": 0.0398, |
| "step": 5850 |
| }, |
| { |
| "epoch": 4.04696132596685, |
| "grad_norm": 0.13936083018779755, |
| "learning_rate": 8.47124286654124e-05, |
| "loss": 0.0382, |
| "step": 5860 |
| }, |
| { |
| "epoch": 4.053867403314917, |
| "grad_norm": 0.1510729044675827, |
| "learning_rate": 8.465287821059341e-05, |
| "loss": 0.0345, |
| "step": 5870 |
| }, |
| { |
| "epoch": 4.060773480662983, |
| "grad_norm": 0.18497096002101898, |
| "learning_rate": 8.45932330161286e-05, |
| "loss": 0.0408, |
| "step": 5880 |
| }, |
| { |
| "epoch": 4.06767955801105, |
| "grad_norm": 0.1754857897758484, |
| "learning_rate": 8.453349324508567e-05, |
| "loss": 0.0367, |
| "step": 5890 |
| }, |
| { |
| "epoch": 4.074585635359116, |
| "grad_norm": 0.18897674977779388, |
| "learning_rate": 8.447365906079088e-05, |
| "loss": 0.0379, |
| "step": 5900 |
| }, |
| { |
| "epoch": 4.081491712707182, |
| "grad_norm": 0.1608031988143921, |
| "learning_rate": 8.441373062682856e-05, |
| "loss": 0.0343, |
| "step": 5910 |
| }, |
| { |
| "epoch": 4.088397790055248, |
| "grad_norm": 0.17947819828987122, |
| "learning_rate": 8.43537081070408e-05, |
| "loss": 0.0346, |
| "step": 5920 |
| }, |
| { |
| "epoch": 4.095303867403315, |
| "grad_norm": 0.15115374326705933, |
| "learning_rate": 8.429359166552689e-05, |
| "loss": 0.0319, |
| "step": 5930 |
| }, |
| { |
| "epoch": 4.102209944751381, |
| "grad_norm": 0.18776749074459076, |
| "learning_rate": 8.423338146664284e-05, |
| "loss": 0.0361, |
| "step": 5940 |
| }, |
| { |
| "epoch": 4.109116022099448, |
| "grad_norm": 0.1795329600572586, |
| "learning_rate": 8.417307767500107e-05, |
| "loss": 0.0346, |
| "step": 5950 |
| }, |
| { |
| "epoch": 4.116022099447513, |
| "grad_norm": 0.17658886313438416, |
| "learning_rate": 8.411268045546983e-05, |
| "loss": 0.0318, |
| "step": 5960 |
| }, |
| { |
| "epoch": 4.12292817679558, |
| "grad_norm": 0.18604636192321777, |
| "learning_rate": 8.405218997317281e-05, |
| "loss": 0.0332, |
| "step": 5970 |
| }, |
| { |
| "epoch": 4.129834254143646, |
| "grad_norm": 0.18217170238494873, |
| "learning_rate": 8.399160639348869e-05, |
| "loss": 0.0311, |
| "step": 5980 |
| }, |
| { |
| "epoch": 4.136740331491713, |
| "grad_norm": 0.16615109145641327, |
| "learning_rate": 8.393092988205065e-05, |
| "loss": 0.0324, |
| "step": 5990 |
| }, |
| { |
| "epoch": 4.143646408839779, |
| "grad_norm": 0.2016613483428955, |
| "learning_rate": 8.387016060474597e-05, |
| "loss": 0.0349, |
| "step": 6000 |
| }, |
| { |
| "epoch": 4.150552486187845, |
| "grad_norm": 0.12477770447731018, |
| "learning_rate": 8.380929872771551e-05, |
| "loss": 0.0316, |
| "step": 6010 |
| }, |
| { |
| "epoch": 4.157458563535911, |
| "grad_norm": 0.1528131514787674, |
| "learning_rate": 8.374834441735335e-05, |
| "loss": 0.0354, |
| "step": 6020 |
| }, |
| { |
| "epoch": 4.164364640883978, |
| "grad_norm": 0.10325898975133896, |
| "learning_rate": 8.368729784030622e-05, |
| "loss": 0.0299, |
| "step": 6030 |
| }, |
| { |
| "epoch": 4.171270718232044, |
| "grad_norm": 0.17199422419071198, |
| "learning_rate": 8.362615916347315e-05, |
| "loss": 0.0365, |
| "step": 6040 |
| }, |
| { |
| "epoch": 4.178176795580111, |
| "grad_norm": 0.1270386278629303, |
| "learning_rate": 8.356492855400493e-05, |
| "loss": 0.0311, |
| "step": 6050 |
| }, |
| { |
| "epoch": 4.185082872928176, |
| "grad_norm": 0.14681442081928253, |
| "learning_rate": 8.350360617930371e-05, |
| "loss": 0.0343, |
| "step": 6060 |
| }, |
| { |
| "epoch": 4.191988950276243, |
| "grad_norm": 0.14189831912517548, |
| "learning_rate": 8.344219220702255e-05, |
| "loss": 0.0326, |
| "step": 6070 |
| }, |
| { |
| "epoch": 4.198895027624309, |
| "grad_norm": 0.22745615243911743, |
| "learning_rate": 8.338068680506485e-05, |
| "loss": 0.037, |
| "step": 6080 |
| }, |
| { |
| "epoch": 4.205801104972376, |
| "grad_norm": 0.1944071650505066, |
| "learning_rate": 8.33190901415841e-05, |
| "loss": 0.0377, |
| "step": 6090 |
| }, |
| { |
| "epoch": 4.212707182320442, |
| "grad_norm": 0.14716528356075287, |
| "learning_rate": 8.325740238498317e-05, |
| "loss": 0.0284, |
| "step": 6100 |
| }, |
| { |
| "epoch": 4.219613259668508, |
| "grad_norm": 0.14018702507019043, |
| "learning_rate": 8.319562370391406e-05, |
| "loss": 0.0345, |
| "step": 6110 |
| }, |
| { |
| "epoch": 4.226519337016574, |
| "grad_norm": 0.12675043940544128, |
| "learning_rate": 8.31337542672773e-05, |
| "loss": 0.033, |
| "step": 6120 |
| }, |
| { |
| "epoch": 4.233425414364641, |
| "grad_norm": 0.1785709261894226, |
| "learning_rate": 8.307179424422158e-05, |
| "loss": 0.0378, |
| "step": 6130 |
| }, |
| { |
| "epoch": 4.240331491712707, |
| "grad_norm": 0.15371115505695343, |
| "learning_rate": 8.300974380414327e-05, |
| "loss": 0.0299, |
| "step": 6140 |
| }, |
| { |
| "epoch": 4.247237569060774, |
| "grad_norm": 0.21764731407165527, |
| "learning_rate": 8.294760311668586e-05, |
| "loss": 0.0309, |
| "step": 6150 |
| }, |
| { |
| "epoch": 4.25414364640884, |
| "grad_norm": 0.24038873612880707, |
| "learning_rate": 8.288537235173961e-05, |
| "loss": 0.0353, |
| "step": 6160 |
| }, |
| { |
| "epoch": 4.261049723756906, |
| "grad_norm": 0.17432773113250732, |
| "learning_rate": 8.282305167944108e-05, |
| "loss": 0.0324, |
| "step": 6170 |
| }, |
| { |
| "epoch": 4.267955801104972, |
| "grad_norm": 0.1402132213115692, |
| "learning_rate": 8.276064127017262e-05, |
| "loss": 0.0361, |
| "step": 6180 |
| }, |
| { |
| "epoch": 4.274861878453039, |
| "grad_norm": 0.13318690657615662, |
| "learning_rate": 8.269814129456189e-05, |
| "loss": 0.0369, |
| "step": 6190 |
| }, |
| { |
| "epoch": 4.281767955801105, |
| "grad_norm": 0.11651228368282318, |
| "learning_rate": 8.263555192348143e-05, |
| "loss": 0.0363, |
| "step": 6200 |
| }, |
| { |
| "epoch": 4.288674033149171, |
| "grad_norm": 0.21030980348587036, |
| "learning_rate": 8.257287332804819e-05, |
| "loss": 0.0348, |
| "step": 6210 |
| }, |
| { |
| "epoch": 4.295580110497237, |
| "grad_norm": 0.17713765799999237, |
| "learning_rate": 8.251010567962307e-05, |
| "loss": 0.0339, |
| "step": 6220 |
| }, |
| { |
| "epoch": 4.302486187845304, |
| "grad_norm": 0.1809339076280594, |
| "learning_rate": 8.244724914981041e-05, |
| "loss": 0.0341, |
| "step": 6230 |
| }, |
| { |
| "epoch": 4.30939226519337, |
| "grad_norm": 0.18925215303897858, |
| "learning_rate": 8.238430391045757e-05, |
| "loss": 0.032, |
| "step": 6240 |
| }, |
| { |
| "epoch": 4.316298342541437, |
| "grad_norm": 0.21282361447811127, |
| "learning_rate": 8.232127013365445e-05, |
| "loss": 0.0334, |
| "step": 6250 |
| }, |
| { |
| "epoch": 4.323204419889503, |
| "grad_norm": 0.17454232275485992, |
| "learning_rate": 8.225814799173295e-05, |
| "loss": 0.0367, |
| "step": 6260 |
| }, |
| { |
| "epoch": 4.330110497237569, |
| "grad_norm": 0.22003421187400818, |
| "learning_rate": 8.219493765726663e-05, |
| "loss": 0.0356, |
| "step": 6270 |
| }, |
| { |
| "epoch": 4.337016574585635, |
| "grad_norm": 0.11667243391275406, |
| "learning_rate": 8.21316393030701e-05, |
| "loss": 0.0381, |
| "step": 6280 |
| }, |
| { |
| "epoch": 4.343922651933702, |
| "grad_norm": 0.17640861868858337, |
| "learning_rate": 8.206825310219865e-05, |
| "loss": 0.041, |
| "step": 6290 |
| }, |
| { |
| "epoch": 4.350828729281768, |
| "grad_norm": 0.18771280348300934, |
| "learning_rate": 8.200477922794776e-05, |
| "loss": 0.0297, |
| "step": 6300 |
| }, |
| { |
| "epoch": 4.357734806629834, |
| "grad_norm": 0.15554696321487427, |
| "learning_rate": 8.194121785385256e-05, |
| "loss": 0.035, |
| "step": 6310 |
| }, |
| { |
| "epoch": 4.3646408839779, |
| "grad_norm": 0.1682814359664917, |
| "learning_rate": 8.187756915368741e-05, |
| "loss": 0.0287, |
| "step": 6320 |
| }, |
| { |
| "epoch": 4.371546961325967, |
| "grad_norm": 0.12980349361896515, |
| "learning_rate": 8.181383330146544e-05, |
| "loss": 0.03, |
| "step": 6330 |
| }, |
| { |
| "epoch": 4.378453038674033, |
| "grad_norm": 0.1720675230026245, |
| "learning_rate": 8.175001047143804e-05, |
| "loss": 0.0339, |
| "step": 6340 |
| }, |
| { |
| "epoch": 4.3853591160221, |
| "grad_norm": 0.18164749443531036, |
| "learning_rate": 8.168610083809438e-05, |
| "loss": 0.0302, |
| "step": 6350 |
| }, |
| { |
| "epoch": 4.392265193370166, |
| "grad_norm": 0.1585766226053238, |
| "learning_rate": 8.162210457616095e-05, |
| "loss": 0.0341, |
| "step": 6360 |
| }, |
| { |
| "epoch": 4.399171270718232, |
| "grad_norm": 0.16067397594451904, |
| "learning_rate": 8.155802186060109e-05, |
| "loss": 0.0315, |
| "step": 6370 |
| }, |
| { |
| "epoch": 4.406077348066298, |
| "grad_norm": 0.15979492664337158, |
| "learning_rate": 8.149385286661453e-05, |
| "loss": 0.0321, |
| "step": 6380 |
| }, |
| { |
| "epoch": 4.412983425414365, |
| "grad_norm": 0.11252643913030624, |
| "learning_rate": 8.14295977696368e-05, |
| "loss": 0.0343, |
| "step": 6390 |
| }, |
| { |
| "epoch": 4.419889502762431, |
| "grad_norm": 0.19719359278678894, |
| "learning_rate": 8.13652567453389e-05, |
| "loss": 0.0335, |
| "step": 6400 |
| }, |
| { |
| "epoch": 4.426795580110497, |
| "grad_norm": 0.17754772305488586, |
| "learning_rate": 8.130082996962676e-05, |
| "loss": 0.0354, |
| "step": 6410 |
| }, |
| { |
| "epoch": 4.433701657458563, |
| "grad_norm": 0.15615013241767883, |
| "learning_rate": 8.123631761864068e-05, |
| "loss": 0.0347, |
| "step": 6420 |
| }, |
| { |
| "epoch": 4.44060773480663, |
| "grad_norm": 0.1586446464061737, |
| "learning_rate": 8.1171719868755e-05, |
| "loss": 0.032, |
| "step": 6430 |
| }, |
| { |
| "epoch": 4.447513812154696, |
| "grad_norm": 0.1573350876569748, |
| "learning_rate": 8.110703689657748e-05, |
| "loss": 0.0321, |
| "step": 6440 |
| }, |
| { |
| "epoch": 4.454419889502763, |
| "grad_norm": 0.14814695715904236, |
| "learning_rate": 8.104226887894892e-05, |
| "loss": 0.0303, |
| "step": 6450 |
| }, |
| { |
| "epoch": 4.461325966850829, |
| "grad_norm": 0.16024483740329742, |
| "learning_rate": 8.097741599294257e-05, |
| "loss": 0.031, |
| "step": 6460 |
| }, |
| { |
| "epoch": 4.468232044198895, |
| "grad_norm": 0.19926926493644714, |
| "learning_rate": 8.091247841586378e-05, |
| "loss": 0.0345, |
| "step": 6470 |
| }, |
| { |
| "epoch": 4.475138121546961, |
| "grad_norm": 0.22303733229637146, |
| "learning_rate": 8.084745632524939e-05, |
| "loss": 0.0308, |
| "step": 6480 |
| }, |
| { |
| "epoch": 4.482044198895028, |
| "grad_norm": 0.20035359263420105, |
| "learning_rate": 8.07823498988673e-05, |
| "loss": 0.0317, |
| "step": 6490 |
| }, |
| { |
| "epoch": 4.488950276243094, |
| "grad_norm": 0.17351382970809937, |
| "learning_rate": 8.071715931471602e-05, |
| "loss": 0.032, |
| "step": 6500 |
| }, |
| { |
| "epoch": 4.49585635359116, |
| "grad_norm": 0.2802712619304657, |
| "learning_rate": 8.06518847510241e-05, |
| "loss": 0.0326, |
| "step": 6510 |
| }, |
| { |
| "epoch": 4.502762430939226, |
| "grad_norm": 0.2340390384197235, |
| "learning_rate": 8.058652638624971e-05, |
| "loss": 0.033, |
| "step": 6520 |
| }, |
| { |
| "epoch": 4.509668508287293, |
| "grad_norm": 0.12652724981307983, |
| "learning_rate": 8.052108439908013e-05, |
| "loss": 0.0391, |
| "step": 6530 |
| }, |
| { |
| "epoch": 4.516574585635359, |
| "grad_norm": 0.15997207164764404, |
| "learning_rate": 8.045555896843125e-05, |
| "loss": 0.0332, |
| "step": 6540 |
| }, |
| { |
| "epoch": 4.523480662983426, |
| "grad_norm": 0.19305022060871124, |
| "learning_rate": 8.03899502734471e-05, |
| "loss": 0.0334, |
| "step": 6550 |
| }, |
| { |
| "epoch": 4.530386740331492, |
| "grad_norm": 0.18047112226486206, |
| "learning_rate": 8.032425849349931e-05, |
| "loss": 0.0363, |
| "step": 6560 |
| }, |
| { |
| "epoch": 4.537292817679558, |
| "grad_norm": 0.171234130859375, |
| "learning_rate": 8.025848380818674e-05, |
| "loss": 0.0386, |
| "step": 6570 |
| }, |
| { |
| "epoch": 4.544198895027624, |
| "grad_norm": 0.16757138073444366, |
| "learning_rate": 8.019262639733487e-05, |
| "loss": 0.0402, |
| "step": 6580 |
| }, |
| { |
| "epoch": 4.551104972375691, |
| "grad_norm": 0.18011297285556793, |
| "learning_rate": 8.012668644099531e-05, |
| "loss": 0.0336, |
| "step": 6590 |
| }, |
| { |
| "epoch": 4.558011049723757, |
| "grad_norm": 0.15435227751731873, |
| "learning_rate": 8.006066411944542e-05, |
| "loss": 0.032, |
| "step": 6600 |
| }, |
| { |
| "epoch": 4.564917127071823, |
| "grad_norm": 0.16600175201892853, |
| "learning_rate": 7.999455961318769e-05, |
| "loss": 0.0338, |
| "step": 6610 |
| }, |
| { |
| "epoch": 4.571823204419889, |
| "grad_norm": 0.18113788962364197, |
| "learning_rate": 7.992837310294932e-05, |
| "loss": 0.0306, |
| "step": 6620 |
| }, |
| { |
| "epoch": 4.578729281767956, |
| "grad_norm": 0.15672174096107483, |
| "learning_rate": 7.986210476968167e-05, |
| "loss": 0.0387, |
| "step": 6630 |
| }, |
| { |
| "epoch": 4.585635359116022, |
| "grad_norm": 0.14913024008274078, |
| "learning_rate": 7.97957547945599e-05, |
| "loss": 0.0355, |
| "step": 6640 |
| }, |
| { |
| "epoch": 4.592541436464089, |
| "grad_norm": 0.17059262096881866, |
| "learning_rate": 7.972932335898226e-05, |
| "loss": 0.0314, |
| "step": 6650 |
| }, |
| { |
| "epoch": 4.599447513812155, |
| "grad_norm": 0.15826822817325592, |
| "learning_rate": 7.966281064456975e-05, |
| "loss": 0.0325, |
| "step": 6660 |
| }, |
| { |
| "epoch": 4.606353591160221, |
| "grad_norm": 0.15611670911312103, |
| "learning_rate": 7.959621683316563e-05, |
| "loss": 0.0291, |
| "step": 6670 |
| }, |
| { |
| "epoch": 4.613259668508287, |
| "grad_norm": 0.16045376658439636, |
| "learning_rate": 7.952954210683481e-05, |
| "loss": 0.0356, |
| "step": 6680 |
| }, |
| { |
| "epoch": 4.620165745856354, |
| "grad_norm": 0.11433514207601547, |
| "learning_rate": 7.946278664786345e-05, |
| "loss": 0.031, |
| "step": 6690 |
| }, |
| { |
| "epoch": 4.62707182320442, |
| "grad_norm": 0.18826702237129211, |
| "learning_rate": 7.939595063875842e-05, |
| "loss": 0.0351, |
| "step": 6700 |
| }, |
| { |
| "epoch": 4.633977900552486, |
| "grad_norm": 0.15050998330116272, |
| "learning_rate": 7.932903426224683e-05, |
| "loss": 0.0291, |
| "step": 6710 |
| }, |
| { |
| "epoch": 4.640883977900552, |
| "grad_norm": 0.22147364914417267, |
| "learning_rate": 7.926203770127552e-05, |
| "loss": 0.0337, |
| "step": 6720 |
| }, |
| { |
| "epoch": 4.647790055248619, |
| "grad_norm": 0.22360795736312866, |
| "learning_rate": 7.919496113901046e-05, |
| "loss": 0.0339, |
| "step": 6730 |
| }, |
| { |
| "epoch": 4.654696132596685, |
| "grad_norm": 0.13165533542633057, |
| "learning_rate": 7.912780475883649e-05, |
| "loss": 0.0379, |
| "step": 6740 |
| }, |
| { |
| "epoch": 4.661602209944752, |
| "grad_norm": 0.16516909003257751, |
| "learning_rate": 7.906056874435652e-05, |
| "loss": 0.0296, |
| "step": 6750 |
| }, |
| { |
| "epoch": 4.668508287292818, |
| "grad_norm": 0.1891750693321228, |
| "learning_rate": 7.899325327939131e-05, |
| "loss": 0.028, |
| "step": 6760 |
| }, |
| { |
| "epoch": 4.675414364640884, |
| "grad_norm": 0.16898000240325928, |
| "learning_rate": 7.892585854797872e-05, |
| "loss": 0.031, |
| "step": 6770 |
| }, |
| { |
| "epoch": 4.68232044198895, |
| "grad_norm": 0.1583898961544037, |
| "learning_rate": 7.88583847343734e-05, |
| "loss": 0.0301, |
| "step": 6780 |
| }, |
| { |
| "epoch": 4.689226519337017, |
| "grad_norm": 0.13791148364543915, |
| "learning_rate": 7.879083202304616e-05, |
| "loss": 0.0324, |
| "step": 6790 |
| }, |
| { |
| "epoch": 4.696132596685083, |
| "grad_norm": 0.16449858248233795, |
| "learning_rate": 7.872320059868355e-05, |
| "loss": 0.0373, |
| "step": 6800 |
| }, |
| { |
| "epoch": 4.703038674033149, |
| "grad_norm": 0.12947428226470947, |
| "learning_rate": 7.865549064618729e-05, |
| "loss": 0.0304, |
| "step": 6810 |
| }, |
| { |
| "epoch": 4.709944751381215, |
| "grad_norm": 0.2018185257911682, |
| "learning_rate": 7.858770235067381e-05, |
| "loss": 0.0287, |
| "step": 6820 |
| }, |
| { |
| "epoch": 4.716850828729282, |
| "grad_norm": 0.1298319399356842, |
| "learning_rate": 7.851983589747374e-05, |
| "loss": 0.0275, |
| "step": 6830 |
| }, |
| { |
| "epoch": 4.723756906077348, |
| "grad_norm": 0.13833770155906677, |
| "learning_rate": 7.845189147213133e-05, |
| "loss": 0.0289, |
| "step": 6840 |
| }, |
| { |
| "epoch": 4.730662983425415, |
| "grad_norm": 0.17239126563072205, |
| "learning_rate": 7.838386926040407e-05, |
| "loss": 0.0295, |
| "step": 6850 |
| }, |
| { |
| "epoch": 4.737569060773481, |
| "grad_norm": 0.16230639815330505, |
| "learning_rate": 7.83157694482621e-05, |
| "loss": 0.0323, |
| "step": 6860 |
| }, |
| { |
| "epoch": 4.744475138121547, |
| "grad_norm": 0.14917680621147156, |
| "learning_rate": 7.824759222188768e-05, |
| "loss": 0.0358, |
| "step": 6870 |
| }, |
| { |
| "epoch": 4.751381215469613, |
| "grad_norm": 0.1046120896935463, |
| "learning_rate": 7.817933776767478e-05, |
| "loss": 0.0317, |
| "step": 6880 |
| }, |
| { |
| "epoch": 4.75828729281768, |
| "grad_norm": 0.08659262955188751, |
| "learning_rate": 7.811100627222842e-05, |
| "loss": 0.0349, |
| "step": 6890 |
| }, |
| { |
| "epoch": 4.765193370165746, |
| "grad_norm": 0.16613365709781647, |
| "learning_rate": 7.804259792236435e-05, |
| "loss": 0.0319, |
| "step": 6900 |
| }, |
| { |
| "epoch": 4.7720994475138125, |
| "grad_norm": 0.14178809523582458, |
| "learning_rate": 7.797411290510835e-05, |
| "loss": 0.0359, |
| "step": 6910 |
| }, |
| { |
| "epoch": 4.779005524861878, |
| "grad_norm": 0.13307057321071625, |
| "learning_rate": 7.790555140769586e-05, |
| "loss": 0.0369, |
| "step": 6920 |
| }, |
| { |
| "epoch": 4.785911602209945, |
| "grad_norm": 0.13005660474300385, |
| "learning_rate": 7.78369136175714e-05, |
| "loss": 0.0301, |
| "step": 6930 |
| }, |
| { |
| "epoch": 4.792817679558011, |
| "grad_norm": 0.15029692649841309, |
| "learning_rate": 7.776819972238806e-05, |
| "loss": 0.0288, |
| "step": 6940 |
| }, |
| { |
| "epoch": 4.7997237569060776, |
| "grad_norm": 0.1589047908782959, |
| "learning_rate": 7.7699409910007e-05, |
| "loss": 0.0338, |
| "step": 6950 |
| }, |
| { |
| "epoch": 4.806629834254144, |
| "grad_norm": 0.16635844111442566, |
| "learning_rate": 7.763054436849694e-05, |
| "loss": 0.032, |
| "step": 6960 |
| }, |
| { |
| "epoch": 4.81353591160221, |
| "grad_norm": 0.1771904081106186, |
| "learning_rate": 7.756160328613364e-05, |
| "loss": 0.0293, |
| "step": 6970 |
| }, |
| { |
| "epoch": 4.820441988950276, |
| "grad_norm": 0.1983332484960556, |
| "learning_rate": 7.749258685139942e-05, |
| "loss": 0.0354, |
| "step": 6980 |
| }, |
| { |
| "epoch": 4.827348066298343, |
| "grad_norm": 0.16270354390144348, |
| "learning_rate": 7.742349525298253e-05, |
| "loss": 0.0304, |
| "step": 6990 |
| }, |
| { |
| "epoch": 4.834254143646409, |
| "grad_norm": 0.22154197096824646, |
| "learning_rate": 7.735432867977679e-05, |
| "loss": 0.0327, |
| "step": 7000 |
| }, |
| { |
| "epoch": 4.8411602209944755, |
| "grad_norm": 0.1402016133069992, |
| "learning_rate": 7.728508732088096e-05, |
| "loss": 0.0337, |
| "step": 7010 |
| }, |
| { |
| "epoch": 4.848066298342541, |
| "grad_norm": 0.07997703552246094, |
| "learning_rate": 7.721577136559825e-05, |
| "loss": 0.0287, |
| "step": 7020 |
| }, |
| { |
| "epoch": 4.854972375690608, |
| "grad_norm": 0.16968661546707153, |
| "learning_rate": 7.714638100343588e-05, |
| "loss": 0.0316, |
| "step": 7030 |
| }, |
| { |
| "epoch": 4.861878453038674, |
| "grad_norm": 0.1546269953250885, |
| "learning_rate": 7.707691642410444e-05, |
| "loss": 0.0322, |
| "step": 7040 |
| }, |
| { |
| "epoch": 4.8687845303867405, |
| "grad_norm": 0.13078097999095917, |
| "learning_rate": 7.70073778175174e-05, |
| "loss": 0.0358, |
| "step": 7050 |
| }, |
| { |
| "epoch": 4.875690607734807, |
| "grad_norm": 0.1726975291967392, |
| "learning_rate": 7.69377653737907e-05, |
| "loss": 0.0317, |
| "step": 7060 |
| }, |
| { |
| "epoch": 4.882596685082873, |
| "grad_norm": 0.17169572412967682, |
| "learning_rate": 7.686807928324209e-05, |
| "loss": 0.0319, |
| "step": 7070 |
| }, |
| { |
| "epoch": 4.889502762430939, |
| "grad_norm": 0.17972759902477264, |
| "learning_rate": 7.679831973639065e-05, |
| "loss": 0.031, |
| "step": 7080 |
| }, |
| { |
| "epoch": 4.8964088397790055, |
| "grad_norm": 0.16098080575466156, |
| "learning_rate": 7.672848692395637e-05, |
| "loss": 0.0279, |
| "step": 7090 |
| }, |
| { |
| "epoch": 4.903314917127072, |
| "grad_norm": 0.18605658411979675, |
| "learning_rate": 7.665858103685944e-05, |
| "loss": 0.0316, |
| "step": 7100 |
| }, |
| { |
| "epoch": 4.9102209944751385, |
| "grad_norm": 0.16000963747501373, |
| "learning_rate": 7.658860226621991e-05, |
| "loss": 0.0302, |
| "step": 7110 |
| }, |
| { |
| "epoch": 4.917127071823204, |
| "grad_norm": 0.17880304157733917, |
| "learning_rate": 7.651855080335708e-05, |
| "loss": 0.031, |
| "step": 7120 |
| }, |
| { |
| "epoch": 4.9240331491712706, |
| "grad_norm": 0.15252450108528137, |
| "learning_rate": 7.644842683978896e-05, |
| "loss": 0.0286, |
| "step": 7130 |
| }, |
| { |
| "epoch": 4.930939226519337, |
| "grad_norm": 0.1624738872051239, |
| "learning_rate": 7.63782305672318e-05, |
| "loss": 0.0343, |
| "step": 7140 |
| }, |
| { |
| "epoch": 4.9378453038674035, |
| "grad_norm": 0.1616460680961609, |
| "learning_rate": 7.63079621775995e-05, |
| "loss": 0.0319, |
| "step": 7150 |
| }, |
| { |
| "epoch": 4.94475138121547, |
| "grad_norm": 0.1971554160118103, |
| "learning_rate": 7.623762186300319e-05, |
| "loss": 0.0362, |
| "step": 7160 |
| }, |
| { |
| "epoch": 4.951657458563536, |
| "grad_norm": 0.1854974925518036, |
| "learning_rate": 7.616720981575057e-05, |
| "loss": 0.0321, |
| "step": 7170 |
| }, |
| { |
| "epoch": 4.958563535911602, |
| "grad_norm": 0.1670568883419037, |
| "learning_rate": 7.609672622834552e-05, |
| "loss": 0.029, |
| "step": 7180 |
| }, |
| { |
| "epoch": 4.9654696132596685, |
| "grad_norm": 0.20243513584136963, |
| "learning_rate": 7.602617129348747e-05, |
| "loss": 0.0357, |
| "step": 7190 |
| }, |
| { |
| "epoch": 4.972375690607735, |
| "grad_norm": 0.13823777437210083, |
| "learning_rate": 7.595554520407088e-05, |
| "loss": 0.0307, |
| "step": 7200 |
| }, |
| { |
| "epoch": 4.9792817679558015, |
| "grad_norm": 0.24209170043468475, |
| "learning_rate": 7.588484815318484e-05, |
| "loss": 0.0281, |
| "step": 7210 |
| }, |
| { |
| "epoch": 4.986187845303867, |
| "grad_norm": 0.16390523314476013, |
| "learning_rate": 7.581408033411234e-05, |
| "loss": 0.0368, |
| "step": 7220 |
| }, |
| { |
| "epoch": 4.9930939226519335, |
| "grad_norm": 0.22568488121032715, |
| "learning_rate": 7.574324194032995e-05, |
| "loss": 0.0335, |
| "step": 7230 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.17507368326187134, |
| "learning_rate": 7.567233316550705e-05, |
| "loss": 0.0314, |
| "step": 7240 |
| }, |
| { |
| "epoch": 5.0069060773480665, |
| "grad_norm": 0.1915036290884018, |
| "learning_rate": 7.560135420350562e-05, |
| "loss": 0.0275, |
| "step": 7250 |
| }, |
| { |
| "epoch": 5.013812154696133, |
| "grad_norm": 0.13837654888629913, |
| "learning_rate": 7.553030524837935e-05, |
| "loss": 0.0292, |
| "step": 7260 |
| }, |
| { |
| "epoch": 5.0207182320441985, |
| "grad_norm": 0.11749670654535294, |
| "learning_rate": 7.545918649437341e-05, |
| "loss": 0.0328, |
| "step": 7270 |
| }, |
| { |
| "epoch": 5.027624309392265, |
| "grad_norm": 0.11151426285505295, |
| "learning_rate": 7.538799813592377e-05, |
| "loss": 0.0277, |
| "step": 7280 |
| }, |
| { |
| "epoch": 5.0345303867403315, |
| "grad_norm": 0.13570214807987213, |
| "learning_rate": 7.531674036765662e-05, |
| "loss": 0.0298, |
| "step": 7290 |
| }, |
| { |
| "epoch": 5.041436464088398, |
| "grad_norm": 0.15404485166072845, |
| "learning_rate": 7.524541338438807e-05, |
| "loss": 0.0336, |
| "step": 7300 |
| }, |
| { |
| "epoch": 5.048342541436464, |
| "grad_norm": 0.1855136603116989, |
| "learning_rate": 7.517401738112328e-05, |
| "loss": 0.0326, |
| "step": 7310 |
| }, |
| { |
| "epoch": 5.05524861878453, |
| "grad_norm": 0.1743932068347931, |
| "learning_rate": 7.510255255305628e-05, |
| "loss": 0.0272, |
| "step": 7320 |
| }, |
| { |
| "epoch": 5.0621546961325965, |
| "grad_norm": 0.1550290435552597, |
| "learning_rate": 7.503101909556911e-05, |
| "loss": 0.0314, |
| "step": 7330 |
| }, |
| { |
| "epoch": 5.069060773480663, |
| "grad_norm": 0.14492987096309662, |
| "learning_rate": 7.495941720423154e-05, |
| "loss": 0.0304, |
| "step": 7340 |
| }, |
| { |
| "epoch": 5.0759668508287294, |
| "grad_norm": 0.16522392630577087, |
| "learning_rate": 7.488774707480042e-05, |
| "loss": 0.0296, |
| "step": 7350 |
| }, |
| { |
| "epoch": 5.082872928176796, |
| "grad_norm": 0.2069702297449112, |
| "learning_rate": 7.481600890321911e-05, |
| "loss": 0.0261, |
| "step": 7360 |
| }, |
| { |
| "epoch": 5.0897790055248615, |
| "grad_norm": 0.17909006774425507, |
| "learning_rate": 7.474420288561708e-05, |
| "loss": 0.0306, |
| "step": 7370 |
| }, |
| { |
| "epoch": 5.096685082872928, |
| "grad_norm": 0.1516609787940979, |
| "learning_rate": 7.467232921830921e-05, |
| "loss": 0.0308, |
| "step": 7380 |
| }, |
| { |
| "epoch": 5.1035911602209945, |
| "grad_norm": 0.12364234030246735, |
| "learning_rate": 7.460038809779537e-05, |
| "loss": 0.0321, |
| "step": 7390 |
| }, |
| { |
| "epoch": 5.110497237569061, |
| "grad_norm": 0.16813722252845764, |
| "learning_rate": 7.452837972075983e-05, |
| "loss": 0.0293, |
| "step": 7400 |
| }, |
| { |
| "epoch": 5.117403314917127, |
| "grad_norm": 0.21152564883232117, |
| "learning_rate": 7.445630428407074e-05, |
| "loss": 0.0256, |
| "step": 7410 |
| }, |
| { |
| "epoch": 5.124309392265193, |
| "grad_norm": 0.233183816075325, |
| "learning_rate": 7.43841619847796e-05, |
| "loss": 0.0328, |
| "step": 7420 |
| }, |
| { |
| "epoch": 5.1312154696132595, |
| "grad_norm": 0.16253533959388733, |
| "learning_rate": 7.431195302012072e-05, |
| "loss": 0.028, |
| "step": 7430 |
| }, |
| { |
| "epoch": 5.138121546961326, |
| "grad_norm": 0.11992829293012619, |
| "learning_rate": 7.423967758751061e-05, |
| "loss": 0.0327, |
| "step": 7440 |
| }, |
| { |
| "epoch": 5.145027624309392, |
| "grad_norm": 0.08840538561344147, |
| "learning_rate": 7.416733588454758e-05, |
| "loss": 0.0236, |
| "step": 7450 |
| }, |
| { |
| "epoch": 5.151933701657459, |
| "grad_norm": 0.18914037942886353, |
| "learning_rate": 7.409492810901106e-05, |
| "loss": 0.0306, |
| "step": 7460 |
| }, |
| { |
| "epoch": 5.1588397790055245, |
| "grad_norm": 0.203793466091156, |
| "learning_rate": 7.402245445886116e-05, |
| "loss": 0.0298, |
| "step": 7470 |
| }, |
| { |
| "epoch": 5.165745856353591, |
| "grad_norm": 0.15535494685173035, |
| "learning_rate": 7.394991513223806e-05, |
| "loss": 0.0328, |
| "step": 7480 |
| }, |
| { |
| "epoch": 5.172651933701657, |
| "grad_norm": 0.1621917188167572, |
| "learning_rate": 7.38773103274615e-05, |
| "loss": 0.0327, |
| "step": 7490 |
| }, |
| { |
| "epoch": 5.179558011049724, |
| "grad_norm": 0.15905600786209106, |
| "learning_rate": 7.380464024303028e-05, |
| "loss": 0.0306, |
| "step": 7500 |
| }, |
| { |
| "epoch": 5.18646408839779, |
| "grad_norm": 0.12775392830371857, |
| "learning_rate": 7.373190507762162e-05, |
| "loss": 0.0277, |
| "step": 7510 |
| }, |
| { |
| "epoch": 5.193370165745856, |
| "grad_norm": 0.20729433000087738, |
| "learning_rate": 7.365910503009066e-05, |
| "loss": 0.0305, |
| "step": 7520 |
| }, |
| { |
| "epoch": 5.2002762430939224, |
| "grad_norm": 0.1747429072856903, |
| "learning_rate": 7.358624029946996e-05, |
| "loss": 0.0318, |
| "step": 7530 |
| }, |
| { |
| "epoch": 5.207182320441989, |
| "grad_norm": 0.12930446863174438, |
| "learning_rate": 7.351331108496893e-05, |
| "loss": 0.0264, |
| "step": 7540 |
| }, |
| { |
| "epoch": 5.214088397790055, |
| "grad_norm": 0.16786518692970276, |
| "learning_rate": 7.344031758597325e-05, |
| "loss": 0.0276, |
| "step": 7550 |
| }, |
| { |
| "epoch": 5.220994475138122, |
| "grad_norm": 0.15410704910755157, |
| "learning_rate": 7.336726000204435e-05, |
| "loss": 0.0274, |
| "step": 7560 |
| }, |
| { |
| "epoch": 5.2279005524861875, |
| "grad_norm": 0.11539943516254425, |
| "learning_rate": 7.32941385329189e-05, |
| "loss": 0.0278, |
| "step": 7570 |
| }, |
| { |
| "epoch": 5.234806629834254, |
| "grad_norm": 0.2146443873643875, |
| "learning_rate": 7.322095337850816e-05, |
| "loss": 0.035, |
| "step": 7580 |
| }, |
| { |
| "epoch": 5.24171270718232, |
| "grad_norm": 0.17688392102718353, |
| "learning_rate": 7.314770473889758e-05, |
| "loss": 0.0299, |
| "step": 7590 |
| }, |
| { |
| "epoch": 5.248618784530387, |
| "grad_norm": 0.1647810935974121, |
| "learning_rate": 7.307439281434615e-05, |
| "loss": 0.034, |
| "step": 7600 |
| }, |
| { |
| "epoch": 5.255524861878453, |
| "grad_norm": 0.14570735394954681, |
| "learning_rate": 7.300101780528585e-05, |
| "loss": 0.0278, |
| "step": 7610 |
| }, |
| { |
| "epoch": 5.262430939226519, |
| "grad_norm": 0.14859934151172638, |
| "learning_rate": 7.292757991232117e-05, |
| "loss": 0.0299, |
| "step": 7620 |
| }, |
| { |
| "epoch": 5.269337016574585, |
| "grad_norm": 0.19481755793094635, |
| "learning_rate": 7.285407933622848e-05, |
| "loss": 0.0295, |
| "step": 7630 |
| }, |
| { |
| "epoch": 5.276243093922652, |
| "grad_norm": 0.18291620910167694, |
| "learning_rate": 7.278051627795557e-05, |
| "loss": 0.0309, |
| "step": 7640 |
| }, |
| { |
| "epoch": 5.283149171270718, |
| "grad_norm": 0.135209321975708, |
| "learning_rate": 7.270689093862105e-05, |
| "loss": 0.0291, |
| "step": 7650 |
| }, |
| { |
| "epoch": 5.290055248618785, |
| "grad_norm": 0.15347661077976227, |
| "learning_rate": 7.263320351951374e-05, |
| "loss": 0.029, |
| "step": 7660 |
| }, |
| { |
| "epoch": 5.29696132596685, |
| "grad_norm": 0.17232327163219452, |
| "learning_rate": 7.255945422209227e-05, |
| "loss": 0.0304, |
| "step": 7670 |
| }, |
| { |
| "epoch": 5.303867403314917, |
| "grad_norm": 0.10398121923208237, |
| "learning_rate": 7.248564324798437e-05, |
| "loss": 0.0269, |
| "step": 7680 |
| }, |
| { |
| "epoch": 5.310773480662983, |
| "grad_norm": 0.20927193760871887, |
| "learning_rate": 7.241177079898644e-05, |
| "loss": 0.0303, |
| "step": 7690 |
| }, |
| { |
| "epoch": 5.31767955801105, |
| "grad_norm": 0.15718117356300354, |
| "learning_rate": 7.233783707706295e-05, |
| "loss": 0.0303, |
| "step": 7700 |
| }, |
| { |
| "epoch": 5.324585635359116, |
| "grad_norm": 0.15242931246757507, |
| "learning_rate": 7.226384228434586e-05, |
| "loss": 0.0303, |
| "step": 7710 |
| }, |
| { |
| "epoch": 5.331491712707182, |
| "grad_norm": 0.2130342721939087, |
| "learning_rate": 7.21897866231341e-05, |
| "loss": 0.0268, |
| "step": 7720 |
| }, |
| { |
| "epoch": 5.338397790055248, |
| "grad_norm": 0.1480337530374527, |
| "learning_rate": 7.211567029589303e-05, |
| "loss": 0.0312, |
| "step": 7730 |
| }, |
| { |
| "epoch": 5.345303867403315, |
| "grad_norm": 0.12969496846199036, |
| "learning_rate": 7.204149350525387e-05, |
| "loss": 0.0276, |
| "step": 7740 |
| }, |
| { |
| "epoch": 5.352209944751381, |
| "grad_norm": 0.1786733865737915, |
| "learning_rate": 7.196725645401309e-05, |
| "loss": 0.0285, |
| "step": 7750 |
| }, |
| { |
| "epoch": 5.359116022099448, |
| "grad_norm": 0.1749386489391327, |
| "learning_rate": 7.1892959345132e-05, |
| "loss": 0.0262, |
| "step": 7760 |
| }, |
| { |
| "epoch": 5.366022099447513, |
| "grad_norm": 0.11476799100637436, |
| "learning_rate": 7.181860238173605e-05, |
| "loss": 0.0267, |
| "step": 7770 |
| }, |
| { |
| "epoch": 5.37292817679558, |
| "grad_norm": 0.17516183853149414, |
| "learning_rate": 7.174418576711432e-05, |
| "loss": 0.0302, |
| "step": 7780 |
| }, |
| { |
| "epoch": 5.379834254143646, |
| "grad_norm": 0.18794813752174377, |
| "learning_rate": 7.1669709704719e-05, |
| "loss": 0.0285, |
| "step": 7790 |
| }, |
| { |
| "epoch": 5.386740331491713, |
| "grad_norm": 0.12954457104206085, |
| "learning_rate": 7.159517439816481e-05, |
| "loss": 0.0313, |
| "step": 7800 |
| }, |
| { |
| "epoch": 5.393646408839779, |
| "grad_norm": 0.14184238016605377, |
| "learning_rate": 7.152058005122842e-05, |
| "loss": 0.0278, |
| "step": 7810 |
| }, |
| { |
| "epoch": 5.400552486187845, |
| "grad_norm": 0.1285337507724762, |
| "learning_rate": 7.144592686784793e-05, |
| "loss": 0.0306, |
| "step": 7820 |
| }, |
| { |
| "epoch": 5.407458563535911, |
| "grad_norm": 0.14311516284942627, |
| "learning_rate": 7.137121505212229e-05, |
| "loss": 0.039, |
| "step": 7830 |
| }, |
| { |
| "epoch": 5.414364640883978, |
| "grad_norm": 0.17362003028392792, |
| "learning_rate": 7.129644480831077e-05, |
| "loss": 0.0284, |
| "step": 7840 |
| }, |
| { |
| "epoch": 5.421270718232044, |
| "grad_norm": 0.10782770067453384, |
| "learning_rate": 7.122161634083234e-05, |
| "loss": 0.0284, |
| "step": 7850 |
| }, |
| { |
| "epoch": 5.428176795580111, |
| "grad_norm": 0.1495029479265213, |
| "learning_rate": 7.114672985426516e-05, |
| "loss": 0.0274, |
| "step": 7860 |
| }, |
| { |
| "epoch": 5.435082872928177, |
| "grad_norm": 0.14453409612178802, |
| "learning_rate": 7.107178555334606e-05, |
| "loss": 0.0274, |
| "step": 7870 |
| }, |
| { |
| "epoch": 5.441988950276243, |
| "grad_norm": 0.2002222239971161, |
| "learning_rate": 7.099678364296989e-05, |
| "loss": 0.0337, |
| "step": 7880 |
| }, |
| { |
| "epoch": 5.448895027624309, |
| "grad_norm": 0.1697368025779724, |
| "learning_rate": 7.0921724328189e-05, |
| "loss": 0.0303, |
| "step": 7890 |
| }, |
| { |
| "epoch": 5.455801104972376, |
| "grad_norm": 0.14910706877708435, |
| "learning_rate": 7.084660781421268e-05, |
| "loss": 0.0299, |
| "step": 7900 |
| }, |
| { |
| "epoch": 5.462707182320442, |
| "grad_norm": 0.1361190676689148, |
| "learning_rate": 7.077143430640662e-05, |
| "loss": 0.0316, |
| "step": 7910 |
| }, |
| { |
| "epoch": 5.469613259668508, |
| "grad_norm": 0.172740176320076, |
| "learning_rate": 7.069620401029232e-05, |
| "loss": 0.0323, |
| "step": 7920 |
| }, |
| { |
| "epoch": 5.476519337016574, |
| "grad_norm": 0.2003297507762909, |
| "learning_rate": 7.062091713154655e-05, |
| "loss": 0.0281, |
| "step": 7930 |
| }, |
| { |
| "epoch": 5.483425414364641, |
| "grad_norm": 0.19455312192440033, |
| "learning_rate": 7.054557387600075e-05, |
| "loss": 0.0303, |
| "step": 7940 |
| }, |
| { |
| "epoch": 5.490331491712707, |
| "grad_norm": 0.13216443359851837, |
| "learning_rate": 7.04701744496405e-05, |
| "loss": 0.036, |
| "step": 7950 |
| }, |
| { |
| "epoch": 5.497237569060774, |
| "grad_norm": 0.1445705145597458, |
| "learning_rate": 7.039471905860495e-05, |
| "loss": 0.0339, |
| "step": 7960 |
| }, |
| { |
| "epoch": 5.50414364640884, |
| "grad_norm": 0.15850737690925598, |
| "learning_rate": 7.031920790918628e-05, |
| "loss": 0.0286, |
| "step": 7970 |
| }, |
| { |
| "epoch": 5.511049723756906, |
| "grad_norm": 0.18733613193035126, |
| "learning_rate": 7.024364120782906e-05, |
| "loss": 0.0377, |
| "step": 7980 |
| }, |
| { |
| "epoch": 5.517955801104972, |
| "grad_norm": 0.13512170314788818, |
| "learning_rate": 7.016801916112978e-05, |
| "loss": 0.031, |
| "step": 7990 |
| }, |
| { |
| "epoch": 5.524861878453039, |
| "grad_norm": 0.14883825182914734, |
| "learning_rate": 7.009234197583623e-05, |
| "loss": 0.0305, |
| "step": 8000 |
| }, |
| { |
| "epoch": 5.531767955801105, |
| "grad_norm": 0.14048552513122559, |
| "learning_rate": 7.001660985884692e-05, |
| "loss": 0.0287, |
| "step": 8010 |
| }, |
| { |
| "epoch": 5.538674033149171, |
| "grad_norm": 0.14698736369609833, |
| "learning_rate": 6.994082301721063e-05, |
| "loss": 0.0333, |
| "step": 8020 |
| }, |
| { |
| "epoch": 5.545580110497237, |
| "grad_norm": 0.26427164673805237, |
| "learning_rate": 6.986498165812563e-05, |
| "loss": 0.0319, |
| "step": 8030 |
| }, |
| { |
| "epoch": 5.552486187845304, |
| "grad_norm": 0.12380706518888474, |
| "learning_rate": 6.978908598893932e-05, |
| "loss": 0.0258, |
| "step": 8040 |
| }, |
| { |
| "epoch": 5.55939226519337, |
| "grad_norm": 0.17819596827030182, |
| "learning_rate": 6.971313621714756e-05, |
| "loss": 0.0275, |
| "step": 8050 |
| }, |
| { |
| "epoch": 5.566298342541437, |
| "grad_norm": 0.1824468970298767, |
| "learning_rate": 6.96371325503941e-05, |
| "loss": 0.0293, |
| "step": 8060 |
| }, |
| { |
| "epoch": 5.573204419889503, |
| "grad_norm": 0.12114512920379639, |
| "learning_rate": 6.956107519647014e-05, |
| "loss": 0.0284, |
| "step": 8070 |
| }, |
| { |
| "epoch": 5.580110497237569, |
| "grad_norm": 0.12274518609046936, |
| "learning_rate": 6.94849643633135e-05, |
| "loss": 0.028, |
| "step": 8080 |
| }, |
| { |
| "epoch": 5.587016574585635, |
| "grad_norm": 0.1467324197292328, |
| "learning_rate": 6.940880025900834e-05, |
| "loss": 0.0266, |
| "step": 8090 |
| }, |
| { |
| "epoch": 5.593922651933702, |
| "grad_norm": 0.18122711777687073, |
| "learning_rate": 6.933258309178438e-05, |
| "loss": 0.0239, |
| "step": 8100 |
| }, |
| { |
| "epoch": 5.600828729281768, |
| "grad_norm": 0.14374437928199768, |
| "learning_rate": 6.925631307001646e-05, |
| "loss": 0.0296, |
| "step": 8110 |
| }, |
| { |
| "epoch": 5.607734806629834, |
| "grad_norm": 0.16520264744758606, |
| "learning_rate": 6.91799904022239e-05, |
| "loss": 0.028, |
| "step": 8120 |
| }, |
| { |
| "epoch": 5.6146408839779, |
| "grad_norm": 0.13161131739616394, |
| "learning_rate": 6.910361529706997e-05, |
| "loss": 0.0245, |
| "step": 8130 |
| }, |
| { |
| "epoch": 5.621546961325967, |
| "grad_norm": 0.13930374383926392, |
| "learning_rate": 6.902718796336131e-05, |
| "loss": 0.0299, |
| "step": 8140 |
| }, |
| { |
| "epoch": 5.628453038674033, |
| "grad_norm": 0.12098735570907593, |
| "learning_rate": 6.895070861004729e-05, |
| "loss": 0.0308, |
| "step": 8150 |
| }, |
| { |
| "epoch": 5.6353591160221, |
| "grad_norm": 0.19493934512138367, |
| "learning_rate": 6.887417744621956e-05, |
| "loss": 0.0288, |
| "step": 8160 |
| }, |
| { |
| "epoch": 5.642265193370166, |
| "grad_norm": 0.10348235070705414, |
| "learning_rate": 6.87975946811114e-05, |
| "loss": 0.0274, |
| "step": 8170 |
| }, |
| { |
| "epoch": 5.649171270718232, |
| "grad_norm": 0.19127143919467926, |
| "learning_rate": 6.872096052409718e-05, |
| "loss": 0.0284, |
| "step": 8180 |
| }, |
| { |
| "epoch": 5.656077348066298, |
| "grad_norm": 0.17267738282680511, |
| "learning_rate": 6.864427518469174e-05, |
| "loss": 0.0303, |
| "step": 8190 |
| }, |
| { |
| "epoch": 5.662983425414365, |
| "grad_norm": 0.14508427679538727, |
| "learning_rate": 6.856753887254986e-05, |
| "loss": 0.0262, |
| "step": 8200 |
| }, |
| { |
| "epoch": 5.669889502762431, |
| "grad_norm": 0.15535277128219604, |
| "learning_rate": 6.849075179746572e-05, |
| "loss": 0.0304, |
| "step": 8210 |
| }, |
| { |
| "epoch": 5.676795580110497, |
| "grad_norm": 0.1699511557817459, |
| "learning_rate": 6.841391416937221e-05, |
| "loss": 0.0321, |
| "step": 8220 |
| }, |
| { |
| "epoch": 5.683701657458563, |
| "grad_norm": 0.17711235582828522, |
| "learning_rate": 6.833702619834053e-05, |
| "loss": 0.0276, |
| "step": 8230 |
| }, |
| { |
| "epoch": 5.69060773480663, |
| "grad_norm": 0.21901249885559082, |
| "learning_rate": 6.82600880945794e-05, |
| "loss": 0.029, |
| "step": 8240 |
| }, |
| { |
| "epoch": 5.697513812154696, |
| "grad_norm": 0.11280661821365356, |
| "learning_rate": 6.818310006843468e-05, |
| "loss": 0.0237, |
| "step": 8250 |
| }, |
| { |
| "epoch": 5.704419889502763, |
| "grad_norm": 0.12669578194618225, |
| "learning_rate": 6.810606233038868e-05, |
| "loss": 0.0273, |
| "step": 8260 |
| }, |
| { |
| "epoch": 5.711325966850829, |
| "grad_norm": 0.15255232155323029, |
| "learning_rate": 6.802897509105966e-05, |
| "loss": 0.0331, |
| "step": 8270 |
| }, |
| { |
| "epoch": 5.718232044198895, |
| "grad_norm": 0.17251603305339813, |
| "learning_rate": 6.79518385612012e-05, |
| "loss": 0.0318, |
| "step": 8280 |
| }, |
| { |
| "epoch": 5.725138121546961, |
| "grad_norm": 0.15754754841327667, |
| "learning_rate": 6.787465295170157e-05, |
| "loss": 0.0272, |
| "step": 8290 |
| }, |
| { |
| "epoch": 5.732044198895028, |
| "grad_norm": 0.13025134801864624, |
| "learning_rate": 6.779741847358332e-05, |
| "loss": 0.0246, |
| "step": 8300 |
| }, |
| { |
| "epoch": 5.738950276243094, |
| "grad_norm": 0.1261071413755417, |
| "learning_rate": 6.772013533800256e-05, |
| "loss": 0.0305, |
| "step": 8310 |
| }, |
| { |
| "epoch": 5.74585635359116, |
| "grad_norm": 0.15836410224437714, |
| "learning_rate": 6.764280375624843e-05, |
| "loss": 0.0307, |
| "step": 8320 |
| }, |
| { |
| "epoch": 5.752762430939226, |
| "grad_norm": 0.21966996788978577, |
| "learning_rate": 6.756542393974252e-05, |
| "loss": 0.0309, |
| "step": 8330 |
| }, |
| { |
| "epoch": 5.759668508287293, |
| "grad_norm": 0.16588178277015686, |
| "learning_rate": 6.748799610003828e-05, |
| "loss": 0.0266, |
| "step": 8340 |
| }, |
| { |
| "epoch": 5.766574585635359, |
| "grad_norm": 0.11811785399913788, |
| "learning_rate": 6.741052044882048e-05, |
| "loss": 0.0301, |
| "step": 8350 |
| }, |
| { |
| "epoch": 5.773480662983426, |
| "grad_norm": 0.09797261655330658, |
| "learning_rate": 6.73329971979046e-05, |
| "loss": 0.0263, |
| "step": 8360 |
| }, |
| { |
| "epoch": 5.780386740331492, |
| "grad_norm": 0.12955108284950256, |
| "learning_rate": 6.725542655923625e-05, |
| "loss": 0.0262, |
| "step": 8370 |
| }, |
| { |
| "epoch": 5.787292817679558, |
| "grad_norm": 0.14564071595668793, |
| "learning_rate": 6.717780874489057e-05, |
| "loss": 0.0287, |
| "step": 8380 |
| }, |
| { |
| "epoch": 5.794198895027624, |
| "grad_norm": 0.19383345544338226, |
| "learning_rate": 6.710014396707172e-05, |
| "loss": 0.028, |
| "step": 8390 |
| }, |
| { |
| "epoch": 5.801104972375691, |
| "grad_norm": 0.11382323503494263, |
| "learning_rate": 6.702243243811221e-05, |
| "loss": 0.0218, |
| "step": 8400 |
| }, |
| { |
| "epoch": 5.808011049723757, |
| "grad_norm": 0.15130683779716492, |
| "learning_rate": 6.694467437047244e-05, |
| "loss": 0.0265, |
| "step": 8410 |
| }, |
| { |
| "epoch": 5.814917127071823, |
| "grad_norm": 0.15816499292850494, |
| "learning_rate": 6.686686997673997e-05, |
| "loss": 0.0277, |
| "step": 8420 |
| }, |
| { |
| "epoch": 5.821823204419889, |
| "grad_norm": 0.1593073159456253, |
| "learning_rate": 6.678901946962903e-05, |
| "loss": 0.0262, |
| "step": 8430 |
| }, |
| { |
| "epoch": 5.828729281767956, |
| "grad_norm": 0.11927701532840729, |
| "learning_rate": 6.671112306197996e-05, |
| "loss": 0.024, |
| "step": 8440 |
| }, |
| { |
| "epoch": 5.835635359116022, |
| "grad_norm": 0.1482495218515396, |
| "learning_rate": 6.663318096675854e-05, |
| "loss": 0.0269, |
| "step": 8450 |
| }, |
| { |
| "epoch": 5.842541436464089, |
| "grad_norm": 0.1422957479953766, |
| "learning_rate": 6.655519339705552e-05, |
| "loss": 0.0229, |
| "step": 8460 |
| }, |
| { |
| "epoch": 5.849447513812155, |
| "grad_norm": 0.12634189426898956, |
| "learning_rate": 6.647716056608588e-05, |
| "loss": 0.026, |
| "step": 8470 |
| }, |
| { |
| "epoch": 5.856353591160221, |
| "grad_norm": 0.21049568057060242, |
| "learning_rate": 6.639908268718843e-05, |
| "loss": 0.0302, |
| "step": 8480 |
| }, |
| { |
| "epoch": 5.863259668508287, |
| "grad_norm": 0.12989932298660278, |
| "learning_rate": 6.632095997382514e-05, |
| "loss": 0.0301, |
| "step": 8490 |
| }, |
| { |
| "epoch": 5.870165745856354, |
| "grad_norm": 0.2166806012392044, |
| "learning_rate": 6.624279263958047e-05, |
| "loss": 0.0226, |
| "step": 8500 |
| }, |
| { |
| "epoch": 5.87707182320442, |
| "grad_norm": 0.2074391394853592, |
| "learning_rate": 6.616458089816097e-05, |
| "loss": 0.0307, |
| "step": 8510 |
| }, |
| { |
| "epoch": 5.883977900552486, |
| "grad_norm": 0.15048719942569733, |
| "learning_rate": 6.608632496339454e-05, |
| "loss": 0.0283, |
| "step": 8520 |
| }, |
| { |
| "epoch": 5.890883977900552, |
| "grad_norm": 0.12132376432418823, |
| "learning_rate": 6.600802504922988e-05, |
| "loss": 0.0273, |
| "step": 8530 |
| }, |
| { |
| "epoch": 5.897790055248619, |
| "grad_norm": 0.13046051561832428, |
| "learning_rate": 6.592968136973604e-05, |
| "loss": 0.0263, |
| "step": 8540 |
| }, |
| { |
| "epoch": 5.904696132596685, |
| "grad_norm": 0.16431337594985962, |
| "learning_rate": 6.585129413910159e-05, |
| "loss": 0.0312, |
| "step": 8550 |
| }, |
| { |
| "epoch": 5.911602209944752, |
| "grad_norm": 0.15263038873672485, |
| "learning_rate": 6.577286357163424e-05, |
| "loss": 0.0292, |
| "step": 8560 |
| }, |
| { |
| "epoch": 5.918508287292818, |
| "grad_norm": 0.1429017037153244, |
| "learning_rate": 6.569438988176018e-05, |
| "loss": 0.0295, |
| "step": 8570 |
| }, |
| { |
| "epoch": 5.925414364640884, |
| "grad_norm": 0.1810126006603241, |
| "learning_rate": 6.561587328402347e-05, |
| "loss": 0.0264, |
| "step": 8580 |
| }, |
| { |
| "epoch": 5.93232044198895, |
| "grad_norm": 0.1651168018579483, |
| "learning_rate": 6.553731399308549e-05, |
| "loss": 0.0264, |
| "step": 8590 |
| }, |
| { |
| "epoch": 5.939226519337017, |
| "grad_norm": 0.13501553237438202, |
| "learning_rate": 6.545871222372436e-05, |
| "loss": 0.032, |
| "step": 8600 |
| }, |
| { |
| "epoch": 5.946132596685083, |
| "grad_norm": 0.1565493941307068, |
| "learning_rate": 6.538006819083426e-05, |
| "loss": 0.0269, |
| "step": 8610 |
| }, |
| { |
| "epoch": 5.953038674033149, |
| "grad_norm": 0.16880425810813904, |
| "learning_rate": 6.530138210942505e-05, |
| "loss": 0.0304, |
| "step": 8620 |
| }, |
| { |
| "epoch": 5.959944751381215, |
| "grad_norm": 0.12984171509742737, |
| "learning_rate": 6.522265419462141e-05, |
| "loss": 0.0264, |
| "step": 8630 |
| }, |
| { |
| "epoch": 5.966850828729282, |
| "grad_norm": 0.11698273569345474, |
| "learning_rate": 6.514388466166248e-05, |
| "loss": 0.0278, |
| "step": 8640 |
| }, |
| { |
| "epoch": 5.973756906077348, |
| "grad_norm": 0.20552147924900055, |
| "learning_rate": 6.506507372590119e-05, |
| "loss": 0.0288, |
| "step": 8650 |
| }, |
| { |
| "epoch": 5.980662983425415, |
| "grad_norm": 0.14010268449783325, |
| "learning_rate": 6.498622160280355e-05, |
| "loss": 0.0235, |
| "step": 8660 |
| }, |
| { |
| "epoch": 5.987569060773481, |
| "grad_norm": 0.17830894887447357, |
| "learning_rate": 6.490732850794832e-05, |
| "loss": 0.0301, |
| "step": 8670 |
| }, |
| { |
| "epoch": 5.994475138121547, |
| "grad_norm": 0.1695212870836258, |
| "learning_rate": 6.482839465702616e-05, |
| "loss": 0.0315, |
| "step": 8680 |
| }, |
| { |
| "epoch": 6.001381215469613, |
| "grad_norm": 0.17388391494750977, |
| "learning_rate": 6.474942026583923e-05, |
| "loss": 0.0276, |
| "step": 8690 |
| }, |
| { |
| "epoch": 6.00828729281768, |
| "grad_norm": 0.16767103970050812, |
| "learning_rate": 6.467040555030052e-05, |
| "loss": 0.0259, |
| "step": 8700 |
| }, |
| { |
| "epoch": 6.015193370165746, |
| "grad_norm": 0.13405068218708038, |
| "learning_rate": 6.459135072643321e-05, |
| "loss": 0.0223, |
| "step": 8710 |
| }, |
| { |
| "epoch": 6.0220994475138125, |
| "grad_norm": 0.11968281120061874, |
| "learning_rate": 6.451225601037019e-05, |
| "loss": 0.028, |
| "step": 8720 |
| }, |
| { |
| "epoch": 6.029005524861878, |
| "grad_norm": 0.13727779686450958, |
| "learning_rate": 6.443312161835338e-05, |
| "loss": 0.0285, |
| "step": 8730 |
| }, |
| { |
| "epoch": 6.035911602209945, |
| "grad_norm": 0.140719473361969, |
| "learning_rate": 6.43539477667332e-05, |
| "loss": 0.027, |
| "step": 8740 |
| }, |
| { |
| "epoch": 6.042817679558011, |
| "grad_norm": 0.11544650793075562, |
| "learning_rate": 6.427473467196793e-05, |
| "loss": 0.0303, |
| "step": 8750 |
| }, |
| { |
| "epoch": 6.0497237569060776, |
| "grad_norm": 0.1590787172317505, |
| "learning_rate": 6.419548255062315e-05, |
| "loss": 0.0281, |
| "step": 8760 |
| }, |
| { |
| "epoch": 6.056629834254144, |
| "grad_norm": 0.1701236516237259, |
| "learning_rate": 6.411619161937112e-05, |
| "loss": 0.0282, |
| "step": 8770 |
| }, |
| { |
| "epoch": 6.06353591160221, |
| "grad_norm": 0.19218502938747406, |
| "learning_rate": 6.403686209499022e-05, |
| "loss": 0.0308, |
| "step": 8780 |
| }, |
| { |
| "epoch": 6.070441988950276, |
| "grad_norm": 0.18104462325572968, |
| "learning_rate": 6.395749419436437e-05, |
| "loss": 0.0299, |
| "step": 8790 |
| }, |
| { |
| "epoch": 6.077348066298343, |
| "grad_norm": 0.11846525967121124, |
| "learning_rate": 6.387808813448234e-05, |
| "loss": 0.0236, |
| "step": 8800 |
| }, |
| { |
| "epoch": 6.084254143646409, |
| "grad_norm": 0.11504874378442764, |
| "learning_rate": 6.37986441324373e-05, |
| "loss": 0.0322, |
| "step": 8810 |
| }, |
| { |
| "epoch": 6.0911602209944755, |
| "grad_norm": 0.15801477432250977, |
| "learning_rate": 6.37191624054261e-05, |
| "loss": 0.0251, |
| "step": 8820 |
| }, |
| { |
| "epoch": 6.098066298342541, |
| "grad_norm": 0.2594138979911804, |
| "learning_rate": 6.363964317074872e-05, |
| "loss": 0.0332, |
| "step": 8830 |
| }, |
| { |
| "epoch": 6.104972375690608, |
| "grad_norm": 0.1887638121843338, |
| "learning_rate": 6.356008664580776e-05, |
| "loss": 0.0251, |
| "step": 8840 |
| }, |
| { |
| "epoch": 6.111878453038674, |
| "grad_norm": 0.19078470766544342, |
| "learning_rate": 6.348049304810771e-05, |
| "loss": 0.0272, |
| "step": 8850 |
| }, |
| { |
| "epoch": 6.1187845303867405, |
| "grad_norm": 0.17975382506847382, |
| "learning_rate": 6.340086259525442e-05, |
| "loss": 0.0314, |
| "step": 8860 |
| }, |
| { |
| "epoch": 6.125690607734807, |
| "grad_norm": 0.1428086757659912, |
| "learning_rate": 6.332119550495448e-05, |
| "loss": 0.0259, |
| "step": 8870 |
| }, |
| { |
| "epoch": 6.132596685082873, |
| "grad_norm": 0.14597752690315247, |
| "learning_rate": 6.324149199501473e-05, |
| "loss": 0.0264, |
| "step": 8880 |
| }, |
| { |
| "epoch": 6.139502762430939, |
| "grad_norm": 0.15924973785877228, |
| "learning_rate": 6.316175228334146e-05, |
| "loss": 0.0258, |
| "step": 8890 |
| }, |
| { |
| "epoch": 6.1464088397790055, |
| "grad_norm": 0.15013249218463898, |
| "learning_rate": 6.308197658794003e-05, |
| "loss": 0.0279, |
| "step": 8900 |
| }, |
| { |
| "epoch": 6.153314917127072, |
| "grad_norm": 0.17688699066638947, |
| "learning_rate": 6.300216512691417e-05, |
| "loss": 0.0262, |
| "step": 8910 |
| }, |
| { |
| "epoch": 6.1602209944751385, |
| "grad_norm": 0.15361438691616058, |
| "learning_rate": 6.292231811846532e-05, |
| "loss": 0.0242, |
| "step": 8920 |
| }, |
| { |
| "epoch": 6.167127071823204, |
| "grad_norm": 0.10727053880691528, |
| "learning_rate": 6.284243578089217e-05, |
| "loss": 0.0242, |
| "step": 8930 |
| }, |
| { |
| "epoch": 6.1740331491712706, |
| "grad_norm": 0.17150332033634186, |
| "learning_rate": 6.276251833258999e-05, |
| "loss": 0.0284, |
| "step": 8940 |
| }, |
| { |
| "epoch": 6.180939226519337, |
| "grad_norm": 0.2145872861146927, |
| "learning_rate": 6.268256599205003e-05, |
| "loss": 0.026, |
| "step": 8950 |
| }, |
| { |
| "epoch": 6.1878453038674035, |
| "grad_norm": 0.13672959804534912, |
| "learning_rate": 6.260257897785892e-05, |
| "loss": 0.0257, |
| "step": 8960 |
| }, |
| { |
| "epoch": 6.19475138121547, |
| "grad_norm": 0.1518806666135788, |
| "learning_rate": 6.252255750869811e-05, |
| "loss": 0.0256, |
| "step": 8970 |
| }, |
| { |
| "epoch": 6.201657458563536, |
| "grad_norm": 0.13806003332138062, |
| "learning_rate": 6.244250180334325e-05, |
| "loss": 0.0281, |
| "step": 8980 |
| }, |
| { |
| "epoch": 6.208563535911602, |
| "grad_norm": 0.1294434815645218, |
| "learning_rate": 6.236241208066356e-05, |
| "loss": 0.0294, |
| "step": 8990 |
| }, |
| { |
| "epoch": 6.2154696132596685, |
| "grad_norm": 0.13486751914024353, |
| "learning_rate": 6.228228855962133e-05, |
| "loss": 0.033, |
| "step": 9000 |
| }, |
| { |
| "epoch": 6.222375690607735, |
| "grad_norm": 0.13604870438575745, |
| "learning_rate": 6.220213145927115e-05, |
| "loss": 0.0247, |
| "step": 9010 |
| }, |
| { |
| "epoch": 6.2292817679558015, |
| "grad_norm": 0.22754208743572235, |
| "learning_rate": 6.212194099875951e-05, |
| "loss": 0.0239, |
| "step": 9020 |
| }, |
| { |
| "epoch": 6.236187845303867, |
| "grad_norm": 0.18670392036437988, |
| "learning_rate": 6.204171739732405e-05, |
| "loss": 0.0267, |
| "step": 9030 |
| }, |
| { |
| "epoch": 6.2430939226519335, |
| "grad_norm": 0.16089409589767456, |
| "learning_rate": 6.196146087429303e-05, |
| "loss": 0.0236, |
| "step": 9040 |
| }, |
| { |
| "epoch": 6.25, |
| "grad_norm": 0.11962158232927322, |
| "learning_rate": 6.188117164908474e-05, |
| "loss": 0.028, |
| "step": 9050 |
| }, |
| { |
| "epoch": 6.2569060773480665, |
| "grad_norm": 0.15136407315731049, |
| "learning_rate": 6.180084994120684e-05, |
| "loss": 0.0258, |
| "step": 9060 |
| }, |
| { |
| "epoch": 6.263812154696133, |
| "grad_norm": 0.17362581193447113, |
| "learning_rate": 6.17204959702558e-05, |
| "loss": 0.0235, |
| "step": 9070 |
| }, |
| { |
| "epoch": 6.2707182320441985, |
| "grad_norm": 0.12609721720218658, |
| "learning_rate": 6.164010995591635e-05, |
| "loss": 0.0274, |
| "step": 9080 |
| }, |
| { |
| "epoch": 6.277624309392265, |
| "grad_norm": 0.13596007227897644, |
| "learning_rate": 6.155969211796076e-05, |
| "loss": 0.0263, |
| "step": 9090 |
| }, |
| { |
| "epoch": 6.2845303867403315, |
| "grad_norm": 0.12467209994792938, |
| "learning_rate": 6.147924267624829e-05, |
| "loss": 0.0227, |
| "step": 9100 |
| }, |
| { |
| "epoch": 6.291436464088398, |
| "grad_norm": 0.2607831656932831, |
| "learning_rate": 6.13987618507247e-05, |
| "loss": 0.0256, |
| "step": 9110 |
| }, |
| { |
| "epoch": 6.298342541436464, |
| "grad_norm": 0.12686564028263092, |
| "learning_rate": 6.131824986142147e-05, |
| "loss": 0.0269, |
| "step": 9120 |
| }, |
| { |
| "epoch": 6.30524861878453, |
| "grad_norm": 0.1370326429605484, |
| "learning_rate": 6.123770692845529e-05, |
| "loss": 0.0205, |
| "step": 9130 |
| }, |
| { |
| "epoch": 6.3121546961325965, |
| "grad_norm": 0.18716467916965485, |
| "learning_rate": 6.11571332720275e-05, |
| "loss": 0.024, |
| "step": 9140 |
| }, |
| { |
| "epoch": 6.319060773480663, |
| "grad_norm": 0.1517820656299591, |
| "learning_rate": 6.107652911242336e-05, |
| "loss": 0.0265, |
| "step": 9150 |
| }, |
| { |
| "epoch": 6.3259668508287294, |
| "grad_norm": 0.10027366876602173, |
| "learning_rate": 6.0995894670011586e-05, |
| "loss": 0.03, |
| "step": 9160 |
| }, |
| { |
| "epoch": 6.332872928176796, |
| "grad_norm": 0.16794605553150177, |
| "learning_rate": 6.091523016524368e-05, |
| "loss": 0.0276, |
| "step": 9170 |
| }, |
| { |
| "epoch": 6.3397790055248615, |
| "grad_norm": 0.1418285071849823, |
| "learning_rate": 6.083453581865328e-05, |
| "loss": 0.0284, |
| "step": 9180 |
| }, |
| { |
| "epoch": 6.346685082872928, |
| "grad_norm": 0.15791954100131989, |
| "learning_rate": 6.075381185085568e-05, |
| "loss": 0.0271, |
| "step": 9190 |
| }, |
| { |
| "epoch": 6.3535911602209945, |
| "grad_norm": 0.1225113794207573, |
| "learning_rate": 6.067305848254709e-05, |
| "loss": 0.0258, |
| "step": 9200 |
| }, |
| { |
| "epoch": 6.360497237569061, |
| "grad_norm": 0.08965400606393814, |
| "learning_rate": 6.059227593450418e-05, |
| "loss": 0.0257, |
| "step": 9210 |
| }, |
| { |
| "epoch": 6.367403314917127, |
| "grad_norm": 0.12374383211135864, |
| "learning_rate": 6.051146442758333e-05, |
| "loss": 0.0289, |
| "step": 9220 |
| }, |
| { |
| "epoch": 6.374309392265193, |
| "grad_norm": 0.13927176594734192, |
| "learning_rate": 6.043062418272012e-05, |
| "loss": 0.0249, |
| "step": 9230 |
| }, |
| { |
| "epoch": 6.3812154696132595, |
| "grad_norm": 0.16667596995830536, |
| "learning_rate": 6.0349755420928666e-05, |
| "loss": 0.0257, |
| "step": 9240 |
| }, |
| { |
| "epoch": 6.388121546961326, |
| "grad_norm": 0.10272025316953659, |
| "learning_rate": 6.0268858363301105e-05, |
| "loss": 0.0263, |
| "step": 9250 |
| }, |
| { |
| "epoch": 6.395027624309392, |
| "grad_norm": 0.14296506345272064, |
| "learning_rate": 6.018793323100689e-05, |
| "loss": 0.0266, |
| "step": 9260 |
| }, |
| { |
| "epoch": 6.401933701657459, |
| "grad_norm": 0.19271421432495117, |
| "learning_rate": 6.0106980245292255e-05, |
| "loss": 0.028, |
| "step": 9270 |
| }, |
| { |
| "epoch": 6.4088397790055245, |
| "grad_norm": 0.13540977239608765, |
| "learning_rate": 6.002599962747957e-05, |
| "loss": 0.0247, |
| "step": 9280 |
| }, |
| { |
| "epoch": 6.415745856353591, |
| "grad_norm": 0.18024395406246185, |
| "learning_rate": 5.994499159896673e-05, |
| "loss": 0.0256, |
| "step": 9290 |
| }, |
| { |
| "epoch": 6.422651933701657, |
| "grad_norm": 0.18153737485408783, |
| "learning_rate": 5.9863956381226607e-05, |
| "loss": 0.0278, |
| "step": 9300 |
| }, |
| { |
| "epoch": 6.429558011049724, |
| "grad_norm": 0.12531007826328278, |
| "learning_rate": 5.9782894195806394e-05, |
| "loss": 0.0265, |
| "step": 9310 |
| }, |
| { |
| "epoch": 6.43646408839779, |
| "grad_norm": 0.13979461789131165, |
| "learning_rate": 5.9701805264327004e-05, |
| "loss": 0.0275, |
| "step": 9320 |
| }, |
| { |
| "epoch": 6.443370165745856, |
| "grad_norm": 0.1680983603000641, |
| "learning_rate": 5.96206898084825e-05, |
| "loss": 0.0283, |
| "step": 9330 |
| }, |
| { |
| "epoch": 6.4502762430939224, |
| "grad_norm": 0.1745212972164154, |
| "learning_rate": 5.953954805003942e-05, |
| "loss": 0.0305, |
| "step": 9340 |
| }, |
| { |
| "epoch": 6.457182320441989, |
| "grad_norm": 0.10051245987415314, |
| "learning_rate": 5.945838021083623e-05, |
| "loss": 0.0239, |
| "step": 9350 |
| }, |
| { |
| "epoch": 6.464088397790055, |
| "grad_norm": 0.1596742868423462, |
| "learning_rate": 5.9377186512782714e-05, |
| "loss": 0.0287, |
| "step": 9360 |
| }, |
| { |
| "epoch": 6.470994475138122, |
| "grad_norm": 0.16254925727844238, |
| "learning_rate": 5.929596717785935e-05, |
| "loss": 0.0314, |
| "step": 9370 |
| }, |
| { |
| "epoch": 6.4779005524861875, |
| "grad_norm": 0.17129167914390564, |
| "learning_rate": 5.921472242811668e-05, |
| "loss": 0.029, |
| "step": 9380 |
| }, |
| { |
| "epoch": 6.484806629834254, |
| "grad_norm": 0.1531635820865631, |
| "learning_rate": 5.913345248567475e-05, |
| "loss": 0.0289, |
| "step": 9390 |
| }, |
| { |
| "epoch": 6.49171270718232, |
| "grad_norm": 0.15288211405277252, |
| "learning_rate": 5.905215757272248e-05, |
| "loss": 0.0236, |
| "step": 9400 |
| }, |
| { |
| "epoch": 6.498618784530387, |
| "grad_norm": 0.1294633448123932, |
| "learning_rate": 5.897083791151706e-05, |
| "loss": 0.0247, |
| "step": 9410 |
| }, |
| { |
| "epoch": 6.505524861878453, |
| "grad_norm": 0.12580148875713348, |
| "learning_rate": 5.888949372438336e-05, |
| "loss": 0.0282, |
| "step": 9420 |
| }, |
| { |
| "epoch": 6.512430939226519, |
| "grad_norm": 0.14940418303012848, |
| "learning_rate": 5.8808125233713255e-05, |
| "loss": 0.0218, |
| "step": 9430 |
| }, |
| { |
| "epoch": 6.519337016574585, |
| "grad_norm": 0.17906662821769714, |
| "learning_rate": 5.872673266196509e-05, |
| "loss": 0.0281, |
| "step": 9440 |
| }, |
| { |
| "epoch": 6.526243093922652, |
| "grad_norm": 0.11938751488924026, |
| "learning_rate": 5.864531623166305e-05, |
| "loss": 0.0257, |
| "step": 9450 |
| }, |
| { |
| "epoch": 6.533149171270718, |
| "grad_norm": 0.14027191698551178, |
| "learning_rate": 5.856387616539656e-05, |
| "loss": 0.0276, |
| "step": 9460 |
| }, |
| { |
| "epoch": 6.540055248618785, |
| "grad_norm": 0.1458040028810501, |
| "learning_rate": 5.848241268581967e-05, |
| "loss": 0.024, |
| "step": 9470 |
| }, |
| { |
| "epoch": 6.546961325966851, |
| "grad_norm": 0.1046091765165329, |
| "learning_rate": 5.840092601565037e-05, |
| "loss": 0.0291, |
| "step": 9480 |
| }, |
| { |
| "epoch": 6.553867403314917, |
| "grad_norm": 0.17765271663665771, |
| "learning_rate": 5.8319416377670144e-05, |
| "loss": 0.0265, |
| "step": 9490 |
| }, |
| { |
| "epoch": 6.560773480662983, |
| "grad_norm": 0.11805755645036697, |
| "learning_rate": 5.82378839947232e-05, |
| "loss": 0.0237, |
| "step": 9500 |
| }, |
| { |
| "epoch": 6.56767955801105, |
| "grad_norm": 0.10168580710887909, |
| "learning_rate": 5.815632908971599e-05, |
| "loss": 0.0266, |
| "step": 9510 |
| }, |
| { |
| "epoch": 6.574585635359116, |
| "grad_norm": 0.1571616679430008, |
| "learning_rate": 5.80747518856165e-05, |
| "loss": 0.0281, |
| "step": 9520 |
| }, |
| { |
| "epoch": 6.581491712707182, |
| "grad_norm": 0.11476701498031616, |
| "learning_rate": 5.799315260545367e-05, |
| "loss": 0.0301, |
| "step": 9530 |
| }, |
| { |
| "epoch": 6.588397790055248, |
| "grad_norm": 0.14077112078666687, |
| "learning_rate": 5.791153147231686e-05, |
| "loss": 0.0326, |
| "step": 9540 |
| }, |
| { |
| "epoch": 6.595303867403315, |
| "grad_norm": 0.10888878256082535, |
| "learning_rate": 5.782988870935509e-05, |
| "loss": 0.027, |
| "step": 9550 |
| }, |
| { |
| "epoch": 6.602209944751381, |
| "grad_norm": 0.18977871537208557, |
| "learning_rate": 5.774822453977657e-05, |
| "loss": 0.0281, |
| "step": 9560 |
| }, |
| { |
| "epoch": 6.609116022099448, |
| "grad_norm": 0.16340450942516327, |
| "learning_rate": 5.7666539186848036e-05, |
| "loss": 0.0274, |
| "step": 9570 |
| }, |
| { |
| "epoch": 6.616022099447514, |
| "grad_norm": 0.16446462273597717, |
| "learning_rate": 5.758483287389411e-05, |
| "loss": 0.0276, |
| "step": 9580 |
| }, |
| { |
| "epoch": 6.62292817679558, |
| "grad_norm": 0.11147239059209824, |
| "learning_rate": 5.7503105824296735e-05, |
| "loss": 0.0248, |
| "step": 9590 |
| }, |
| { |
| "epoch": 6.629834254143646, |
| "grad_norm": 0.17083384096622467, |
| "learning_rate": 5.742135826149453e-05, |
| "loss": 0.0254, |
| "step": 9600 |
| }, |
| { |
| "epoch": 6.636740331491713, |
| "grad_norm": 0.09648158401250839, |
| "learning_rate": 5.7339590408982223e-05, |
| "loss": 0.0284, |
| "step": 9610 |
| }, |
| { |
| "epoch": 6.643646408839779, |
| "grad_norm": 0.18214352428913116, |
| "learning_rate": 5.725780249031e-05, |
| "loss": 0.0354, |
| "step": 9620 |
| }, |
| { |
| "epoch": 6.650552486187845, |
| "grad_norm": 0.12843012809753418, |
| "learning_rate": 5.717599472908292e-05, |
| "loss": 0.0308, |
| "step": 9630 |
| }, |
| { |
| "epoch": 6.657458563535911, |
| "grad_norm": 0.1309477984905243, |
| "learning_rate": 5.7094167348960237e-05, |
| "loss": 0.024, |
| "step": 9640 |
| }, |
| { |
| "epoch": 6.664364640883978, |
| "grad_norm": 0.15698851644992828, |
| "learning_rate": 5.7012320573654945e-05, |
| "loss": 0.0267, |
| "step": 9650 |
| }, |
| { |
| "epoch": 6.671270718232044, |
| "grad_norm": 0.14546315371990204, |
| "learning_rate": 5.693045462693295e-05, |
| "loss": 0.0273, |
| "step": 9660 |
| }, |
| { |
| "epoch": 6.678176795580111, |
| "grad_norm": 0.09835139662027359, |
| "learning_rate": 5.684856973261266e-05, |
| "loss": 0.0244, |
| "step": 9670 |
| }, |
| { |
| "epoch": 6.685082872928177, |
| "grad_norm": 0.16771312057971954, |
| "learning_rate": 5.6766666114564215e-05, |
| "loss": 0.0289, |
| "step": 9680 |
| }, |
| { |
| "epoch": 6.691988950276243, |
| "grad_norm": 0.15076804161071777, |
| "learning_rate": 5.668474399670899e-05, |
| "loss": 0.0245, |
| "step": 9690 |
| }, |
| { |
| "epoch": 6.698895027624309, |
| "grad_norm": 0.2206779420375824, |
| "learning_rate": 5.660280360301896e-05, |
| "loss": 0.0242, |
| "step": 9700 |
| }, |
| { |
| "epoch": 6.705801104972376, |
| "grad_norm": 0.15940850973129272, |
| "learning_rate": 5.652084515751599e-05, |
| "loss": 0.0291, |
| "step": 9710 |
| }, |
| { |
| "epoch": 6.712707182320442, |
| "grad_norm": 0.1726212501525879, |
| "learning_rate": 5.643886888427137e-05, |
| "loss": 0.0271, |
| "step": 9720 |
| }, |
| { |
| "epoch": 6.719613259668508, |
| "grad_norm": 0.15121586620807648, |
| "learning_rate": 5.6356875007405074e-05, |
| "loss": 0.0256, |
| "step": 9730 |
| }, |
| { |
| "epoch": 6.726519337016574, |
| "grad_norm": 0.13991855084896088, |
| "learning_rate": 5.627486375108525e-05, |
| "loss": 0.0272, |
| "step": 9740 |
| }, |
| { |
| "epoch": 6.733425414364641, |
| "grad_norm": 0.1048620268702507, |
| "learning_rate": 5.619283533952754e-05, |
| "loss": 0.0253, |
| "step": 9750 |
| }, |
| { |
| "epoch": 6.740331491712707, |
| "grad_norm": 0.15269067883491516, |
| "learning_rate": 5.6110789996994474e-05, |
| "loss": 0.0256, |
| "step": 9760 |
| }, |
| { |
| "epoch": 6.747237569060774, |
| "grad_norm": 0.1541956663131714, |
| "learning_rate": 5.602872794779491e-05, |
| "loss": 0.0259, |
| "step": 9770 |
| }, |
| { |
| "epoch": 6.75414364640884, |
| "grad_norm": 0.1253366470336914, |
| "learning_rate": 5.594664941628334e-05, |
| "loss": 0.0244, |
| "step": 9780 |
| }, |
| { |
| "epoch": 6.761049723756906, |
| "grad_norm": 0.1777985841035843, |
| "learning_rate": 5.5864554626859324e-05, |
| "loss": 0.0278, |
| "step": 9790 |
| }, |
| { |
| "epoch": 6.767955801104972, |
| "grad_norm": 0.17948810756206512, |
| "learning_rate": 5.578244380396691e-05, |
| "loss": 0.0232, |
| "step": 9800 |
| }, |
| { |
| "epoch": 6.774861878453039, |
| "grad_norm": 0.1327214539051056, |
| "learning_rate": 5.570031717209394e-05, |
| "loss": 0.0261, |
| "step": 9810 |
| }, |
| { |
| "epoch": 6.781767955801105, |
| "grad_norm": 0.22230222821235657, |
| "learning_rate": 5.561817495577147e-05, |
| "loss": 0.0244, |
| "step": 9820 |
| }, |
| { |
| "epoch": 6.788674033149171, |
| "grad_norm": 0.15005578100681305, |
| "learning_rate": 5.5536017379573215e-05, |
| "loss": 0.0288, |
| "step": 9830 |
| }, |
| { |
| "epoch": 6.795580110497237, |
| "grad_norm": 0.1266830712556839, |
| "learning_rate": 5.545384466811483e-05, |
| "loss": 0.0277, |
| "step": 9840 |
| }, |
| { |
| "epoch": 6.802486187845304, |
| "grad_norm": 0.16804485023021698, |
| "learning_rate": 5.5371657046053384e-05, |
| "loss": 0.0296, |
| "step": 9850 |
| }, |
| { |
| "epoch": 6.80939226519337, |
| "grad_norm": 0.16403374075889587, |
| "learning_rate": 5.528945473808669e-05, |
| "loss": 0.0279, |
| "step": 9860 |
| }, |
| { |
| "epoch": 6.816298342541437, |
| "grad_norm": 0.13253077864646912, |
| "learning_rate": 5.520723796895272e-05, |
| "loss": 0.0211, |
| "step": 9870 |
| }, |
| { |
| "epoch": 6.823204419889503, |
| "grad_norm": 0.13611510396003723, |
| "learning_rate": 5.512500696342897e-05, |
| "loss": 0.0246, |
| "step": 9880 |
| }, |
| { |
| "epoch": 6.830110497237569, |
| "grad_norm": 0.16444122791290283, |
| "learning_rate": 5.504276194633188e-05, |
| "loss": 0.0293, |
| "step": 9890 |
| }, |
| { |
| "epoch": 6.837016574585635, |
| "grad_norm": 0.1384178102016449, |
| "learning_rate": 5.49605031425162e-05, |
| "loss": 0.027, |
| "step": 9900 |
| }, |
| { |
| "epoch": 6.843922651933702, |
| "grad_norm": 0.11369351297616959, |
| "learning_rate": 5.487823077687434e-05, |
| "loss": 0.0261, |
| "step": 9910 |
| }, |
| { |
| "epoch": 6.850828729281768, |
| "grad_norm": 0.1023038923740387, |
| "learning_rate": 5.4795945074335806e-05, |
| "loss": 0.0307, |
| "step": 9920 |
| }, |
| { |
| "epoch": 6.857734806629834, |
| "grad_norm": 0.10871675610542297, |
| "learning_rate": 5.471364625986657e-05, |
| "loss": 0.0215, |
| "step": 9930 |
| }, |
| { |
| "epoch": 6.8646408839779, |
| "grad_norm": 0.16438175737857819, |
| "learning_rate": 5.463133455846845e-05, |
| "loss": 0.0291, |
| "step": 9940 |
| }, |
| { |
| "epoch": 6.871546961325967, |
| "grad_norm": 0.10552588105201721, |
| "learning_rate": 5.4549010195178505e-05, |
| "loss": 0.0283, |
| "step": 9950 |
| }, |
| { |
| "epoch": 6.878453038674033, |
| "grad_norm": 0.15607544779777527, |
| "learning_rate": 5.446667339506838e-05, |
| "loss": 0.0272, |
| "step": 9960 |
| }, |
| { |
| "epoch": 6.8853591160221, |
| "grad_norm": 0.17612875998020172, |
| "learning_rate": 5.4384324383243756e-05, |
| "loss": 0.0251, |
| "step": 9970 |
| }, |
| { |
| "epoch": 6.892265193370166, |
| "grad_norm": 0.1139112040400505, |
| "learning_rate": 5.430196338484368e-05, |
| "loss": 0.0251, |
| "step": 9980 |
| }, |
| { |
| "epoch": 6.899171270718232, |
| "grad_norm": 0.17247068881988525, |
| "learning_rate": 5.4219590625039975e-05, |
| "loss": 0.0265, |
| "step": 9990 |
| }, |
| { |
| "epoch": 6.906077348066298, |
| "grad_norm": 0.22626756131649017, |
| "learning_rate": 5.413720632903664e-05, |
| "loss": 0.029, |
| "step": 10000 |
| }, |
| { |
| "epoch": 6.912983425414365, |
| "grad_norm": 0.1493772268295288, |
| "learning_rate": 5.405481072206917e-05, |
| "loss": 0.0247, |
| "step": 10010 |
| }, |
| { |
| "epoch": 6.919889502762431, |
| "grad_norm": 0.11201240867376328, |
| "learning_rate": 5.397240402940402e-05, |
| "loss": 0.0267, |
| "step": 10020 |
| }, |
| { |
| "epoch": 6.926795580110497, |
| "grad_norm": 0.12302827835083008, |
| "learning_rate": 5.388998647633794e-05, |
| "loss": 0.024, |
| "step": 10030 |
| }, |
| { |
| "epoch": 6.933701657458563, |
| "grad_norm": 0.15909771621227264, |
| "learning_rate": 5.380755828819737e-05, |
| "loss": 0.0279, |
| "step": 10040 |
| }, |
| { |
| "epoch": 6.94060773480663, |
| "grad_norm": 0.1554439663887024, |
| "learning_rate": 5.3725119690337846e-05, |
| "loss": 0.0276, |
| "step": 10050 |
| }, |
| { |
| "epoch": 6.947513812154696, |
| "grad_norm": 0.15158556401729584, |
| "learning_rate": 5.3642670908143324e-05, |
| "loss": 0.0248, |
| "step": 10060 |
| }, |
| { |
| "epoch": 6.954419889502763, |
| "grad_norm": 0.1534639447927475, |
| "learning_rate": 5.356021216702562e-05, |
| "loss": 0.0274, |
| "step": 10070 |
| }, |
| { |
| "epoch": 6.961325966850829, |
| "grad_norm": 0.16105881333351135, |
| "learning_rate": 5.347774369242381e-05, |
| "loss": 0.0264, |
| "step": 10080 |
| }, |
| { |
| "epoch": 6.968232044198895, |
| "grad_norm": 0.11113212257623672, |
| "learning_rate": 5.3395265709803545e-05, |
| "loss": 0.0216, |
| "step": 10090 |
| }, |
| { |
| "epoch": 6.975138121546961, |
| "grad_norm": 0.11615071445703506, |
| "learning_rate": 5.331277844465647e-05, |
| "loss": 0.0225, |
| "step": 10100 |
| }, |
| { |
| "epoch": 6.982044198895028, |
| "grad_norm": 0.12254151701927185, |
| "learning_rate": 5.323028212249963e-05, |
| "loss": 0.0269, |
| "step": 10110 |
| }, |
| { |
| "epoch": 6.988950276243094, |
| "grad_norm": 0.1819300502538681, |
| "learning_rate": 5.314777696887481e-05, |
| "loss": 0.0212, |
| "step": 10120 |
| }, |
| { |
| "epoch": 6.99585635359116, |
| "grad_norm": 0.1481214165687561, |
| "learning_rate": 5.306526320934796e-05, |
| "loss": 0.0254, |
| "step": 10130 |
| }, |
| { |
| "epoch": 7.002762430939226, |
| "grad_norm": 0.12868042290210724, |
| "learning_rate": 5.298274106950854e-05, |
| "loss": 0.0229, |
| "step": 10140 |
| }, |
| { |
| "epoch": 7.009668508287293, |
| "grad_norm": 0.11328385025262833, |
| "learning_rate": 5.290021077496893e-05, |
| "loss": 0.0232, |
| "step": 10150 |
| }, |
| { |
| "epoch": 7.016574585635359, |
| "grad_norm": 0.18687033653259277, |
| "learning_rate": 5.2817672551363816e-05, |
| "loss": 0.0234, |
| "step": 10160 |
| }, |
| { |
| "epoch": 7.023480662983426, |
| "grad_norm": 0.12744495272636414, |
| "learning_rate": 5.273512662434952e-05, |
| "loss": 0.0261, |
| "step": 10170 |
| }, |
| { |
| "epoch": 7.030386740331492, |
| "grad_norm": 0.14903679490089417, |
| "learning_rate": 5.265257321960349e-05, |
| "loss": 0.0223, |
| "step": 10180 |
| }, |
| { |
| "epoch": 7.037292817679558, |
| "grad_norm": 0.17800362408161163, |
| "learning_rate": 5.257001256282357e-05, |
| "loss": 0.0285, |
| "step": 10190 |
| }, |
| { |
| "epoch": 7.044198895027624, |
| "grad_norm": 0.16140541434288025, |
| "learning_rate": 5.248744487972742e-05, |
| "loss": 0.0254, |
| "step": 10200 |
| }, |
| { |
| "epoch": 7.051104972375691, |
| "grad_norm": 0.15151412785053253, |
| "learning_rate": 5.240487039605196e-05, |
| "loss": 0.0239, |
| "step": 10210 |
| }, |
| { |
| "epoch": 7.058011049723757, |
| "grad_norm": 0.08070018887519836, |
| "learning_rate": 5.232228933755267e-05, |
| "loss": 0.0234, |
| "step": 10220 |
| }, |
| { |
| "epoch": 7.064917127071824, |
| "grad_norm": 0.148824080824852, |
| "learning_rate": 5.2239701930003006e-05, |
| "loss": 0.0277, |
| "step": 10230 |
| }, |
| { |
| "epoch": 7.071823204419889, |
| "grad_norm": 0.14293543994426727, |
| "learning_rate": 5.215710839919379e-05, |
| "loss": 0.0253, |
| "step": 10240 |
| }, |
| { |
| "epoch": 7.078729281767956, |
| "grad_norm": 0.14560624957084656, |
| "learning_rate": 5.207450897093257e-05, |
| "loss": 0.0224, |
| "step": 10250 |
| }, |
| { |
| "epoch": 7.085635359116022, |
| "grad_norm": 0.15720799565315247, |
| "learning_rate": 5.1991903871043046e-05, |
| "loss": 0.0254, |
| "step": 10260 |
| }, |
| { |
| "epoch": 7.092541436464089, |
| "grad_norm": 0.11100403964519501, |
| "learning_rate": 5.190929332536439e-05, |
| "loss": 0.0287, |
| "step": 10270 |
| }, |
| { |
| "epoch": 7.099447513812155, |
| "grad_norm": 0.15021225810050964, |
| "learning_rate": 5.182667755975071e-05, |
| "loss": 0.0278, |
| "step": 10280 |
| }, |
| { |
| "epoch": 7.106353591160221, |
| "grad_norm": 0.13029877841472626, |
| "learning_rate": 5.1744056800070315e-05, |
| "loss": 0.0257, |
| "step": 10290 |
| }, |
| { |
| "epoch": 7.113259668508287, |
| "grad_norm": 0.22811639308929443, |
| "learning_rate": 5.166143127220524e-05, |
| "loss": 0.0262, |
| "step": 10300 |
| }, |
| { |
| "epoch": 7.120165745856354, |
| "grad_norm": 0.15374548733234406, |
| "learning_rate": 5.1578801202050485e-05, |
| "loss": 0.0239, |
| "step": 10310 |
| }, |
| { |
| "epoch": 7.12707182320442, |
| "grad_norm": 0.13846486806869507, |
| "learning_rate": 5.149616681551355e-05, |
| "loss": 0.0285, |
| "step": 10320 |
| }, |
| { |
| "epoch": 7.133977900552487, |
| "grad_norm": 0.13904505968093872, |
| "learning_rate": 5.141352833851367e-05, |
| "loss": 0.0242, |
| "step": 10330 |
| }, |
| { |
| "epoch": 7.140883977900552, |
| "grad_norm": 0.1465989053249359, |
| "learning_rate": 5.1330885996981285e-05, |
| "loss": 0.0227, |
| "step": 10340 |
| }, |
| { |
| "epoch": 7.147790055248619, |
| "grad_norm": 0.1020495742559433, |
| "learning_rate": 5.124824001685741e-05, |
| "loss": 0.0236, |
| "step": 10350 |
| }, |
| { |
| "epoch": 7.154696132596685, |
| "grad_norm": 0.10960061103105545, |
| "learning_rate": 5.116559062409298e-05, |
| "loss": 0.0228, |
| "step": 10360 |
| }, |
| { |
| "epoch": 7.161602209944752, |
| "grad_norm": 0.1321902722120285, |
| "learning_rate": 5.10829380446483e-05, |
| "loss": 0.0222, |
| "step": 10370 |
| }, |
| { |
| "epoch": 7.168508287292818, |
| "grad_norm": 0.12457681447267532, |
| "learning_rate": 5.100028250449235e-05, |
| "loss": 0.0239, |
| "step": 10380 |
| }, |
| { |
| "epoch": 7.175414364640884, |
| "grad_norm": 0.20130808651447296, |
| "learning_rate": 5.0917624229602234e-05, |
| "loss": 0.0249, |
| "step": 10390 |
| }, |
| { |
| "epoch": 7.18232044198895, |
| "grad_norm": 0.19484300911426544, |
| "learning_rate": 5.0834963445962524e-05, |
| "loss": 0.0261, |
| "step": 10400 |
| }, |
| { |
| "epoch": 7.189226519337017, |
| "grad_norm": 0.12139289826154709, |
| "learning_rate": 5.075230037956461e-05, |
| "loss": 0.0227, |
| "step": 10410 |
| }, |
| { |
| "epoch": 7.196132596685083, |
| "grad_norm": 0.1364196538925171, |
| "learning_rate": 5.0669635256406213e-05, |
| "loss": 0.0229, |
| "step": 10420 |
| }, |
| { |
| "epoch": 7.20303867403315, |
| "grad_norm": 0.12963388860225677, |
| "learning_rate": 5.058696830249058e-05, |
| "loss": 0.0203, |
| "step": 10430 |
| }, |
| { |
| "epoch": 7.209944751381215, |
| "grad_norm": 0.14057807624340057, |
| "learning_rate": 5.050429974382602e-05, |
| "loss": 0.0254, |
| "step": 10440 |
| }, |
| { |
| "epoch": 7.216850828729282, |
| "grad_norm": 0.14869332313537598, |
| "learning_rate": 5.042162980642523e-05, |
| "loss": 0.0235, |
| "step": 10450 |
| }, |
| { |
| "epoch": 7.223756906077348, |
| "grad_norm": 0.17959578335285187, |
| "learning_rate": 5.033895871630462e-05, |
| "loss": 0.0227, |
| "step": 10460 |
| }, |
| { |
| "epoch": 7.230662983425415, |
| "grad_norm": 0.12166395783424377, |
| "learning_rate": 5.025628669948386e-05, |
| "loss": 0.0259, |
| "step": 10470 |
| }, |
| { |
| "epoch": 7.237569060773481, |
| "grad_norm": 0.13333919644355774, |
| "learning_rate": 5.017361398198502e-05, |
| "loss": 0.0239, |
| "step": 10480 |
| }, |
| { |
| "epoch": 7.244475138121547, |
| "grad_norm": 0.17320550978183746, |
| "learning_rate": 5.009094078983221e-05, |
| "loss": 0.0228, |
| "step": 10490 |
| }, |
| { |
| "epoch": 7.251381215469613, |
| "grad_norm": 0.11817017942667007, |
| "learning_rate": 5.000826734905073e-05, |
| "loss": 0.0207, |
| "step": 10500 |
| }, |
| { |
| "epoch": 7.25828729281768, |
| "grad_norm": 0.15460477769374847, |
| "learning_rate": 4.9925593885666645e-05, |
| "loss": 0.0262, |
| "step": 10510 |
| }, |
| { |
| "epoch": 7.265193370165746, |
| "grad_norm": 0.19087368249893188, |
| "learning_rate": 4.984292062570602e-05, |
| "loss": 0.0266, |
| "step": 10520 |
| }, |
| { |
| "epoch": 7.2720994475138125, |
| "grad_norm": 0.12094233930110931, |
| "learning_rate": 4.976024779519442e-05, |
| "loss": 0.0243, |
| "step": 10530 |
| }, |
| { |
| "epoch": 7.279005524861878, |
| "grad_norm": 0.14833706617355347, |
| "learning_rate": 4.9677575620156194e-05, |
| "loss": 0.0244, |
| "step": 10540 |
| }, |
| { |
| "epoch": 7.285911602209945, |
| "grad_norm": 0.1710749715566635, |
| "learning_rate": 4.959490432661391e-05, |
| "loss": 0.0222, |
| "step": 10550 |
| }, |
| { |
| "epoch": 7.292817679558011, |
| "grad_norm": 0.19592711329460144, |
| "learning_rate": 4.9512234140587726e-05, |
| "loss": 0.0231, |
| "step": 10560 |
| }, |
| { |
| "epoch": 7.2997237569060776, |
| "grad_norm": 0.1926083117723465, |
| "learning_rate": 4.942956528809477e-05, |
| "loss": 0.0262, |
| "step": 10570 |
| }, |
| { |
| "epoch": 7.306629834254144, |
| "grad_norm": 0.11659876257181168, |
| "learning_rate": 4.934689799514854e-05, |
| "loss": 0.0236, |
| "step": 10580 |
| }, |
| { |
| "epoch": 7.31353591160221, |
| "grad_norm": 0.1698133498430252, |
| "learning_rate": 4.926423248775827e-05, |
| "loss": 0.0256, |
| "step": 10590 |
| }, |
| { |
| "epoch": 7.320441988950276, |
| "grad_norm": 0.12696529924869537, |
| "learning_rate": 4.918156899192826e-05, |
| "loss": 0.0256, |
| "step": 10600 |
| }, |
| { |
| "epoch": 7.327348066298343, |
| "grad_norm": 0.18490107357501984, |
| "learning_rate": 4.909890773365738e-05, |
| "loss": 0.0257, |
| "step": 10610 |
| }, |
| { |
| "epoch": 7.334254143646409, |
| "grad_norm": 0.1368025690317154, |
| "learning_rate": 4.9016248938938344e-05, |
| "loss": 0.0219, |
| "step": 10620 |
| }, |
| { |
| "epoch": 7.3411602209944755, |
| "grad_norm": 0.17904599010944366, |
| "learning_rate": 4.8933592833757156e-05, |
| "loss": 0.0259, |
| "step": 10630 |
| }, |
| { |
| "epoch": 7.348066298342541, |
| "grad_norm": 0.1488611102104187, |
| "learning_rate": 4.8850939644092435e-05, |
| "loss": 0.023, |
| "step": 10640 |
| }, |
| { |
| "epoch": 7.354972375690608, |
| "grad_norm": 0.16605186462402344, |
| "learning_rate": 4.876828959591485e-05, |
| "loss": 0.0236, |
| "step": 10650 |
| }, |
| { |
| "epoch": 7.361878453038674, |
| "grad_norm": 0.1492478996515274, |
| "learning_rate": 4.8685642915186474e-05, |
| "loss": 0.0237, |
| "step": 10660 |
| }, |
| { |
| "epoch": 7.3687845303867405, |
| "grad_norm": 0.18098857998847961, |
| "learning_rate": 4.860299982786018e-05, |
| "loss": 0.0228, |
| "step": 10670 |
| }, |
| { |
| "epoch": 7.375690607734807, |
| "grad_norm": 0.19155803322792053, |
| "learning_rate": 4.852036055987901e-05, |
| "loss": 0.0265, |
| "step": 10680 |
| }, |
| { |
| "epoch": 7.382596685082873, |
| "grad_norm": 0.11117758601903915, |
| "learning_rate": 4.843772533717558e-05, |
| "loss": 0.0202, |
| "step": 10690 |
| }, |
| { |
| "epoch": 7.389502762430939, |
| "grad_norm": 0.16134271025657654, |
| "learning_rate": 4.835509438567142e-05, |
| "loss": 0.026, |
| "step": 10700 |
| }, |
| { |
| "epoch": 7.3964088397790055, |
| "grad_norm": 0.1662876456975937, |
| "learning_rate": 4.827246793127639e-05, |
| "loss": 0.0229, |
| "step": 10710 |
| }, |
| { |
| "epoch": 7.403314917127072, |
| "grad_norm": 0.17498347163200378, |
| "learning_rate": 4.818984619988807e-05, |
| "loss": 0.0271, |
| "step": 10720 |
| }, |
| { |
| "epoch": 7.4102209944751385, |
| "grad_norm": 0.14823921024799347, |
| "learning_rate": 4.810722941739115e-05, |
| "loss": 0.0209, |
| "step": 10730 |
| }, |
| { |
| "epoch": 7.417127071823204, |
| "grad_norm": 0.15792728960514069, |
| "learning_rate": 4.8024617809656684e-05, |
| "loss": 0.0223, |
| "step": 10740 |
| }, |
| { |
| "epoch": 7.4240331491712706, |
| "grad_norm": 0.1337282657623291, |
| "learning_rate": 4.794201160254171e-05, |
| "loss": 0.0247, |
| "step": 10750 |
| }, |
| { |
| "epoch": 7.430939226519337, |
| "grad_norm": 0.17503942549228668, |
| "learning_rate": 4.785941102188844e-05, |
| "loss": 0.0239, |
| "step": 10760 |
| }, |
| { |
| "epoch": 7.4378453038674035, |
| "grad_norm": 0.1897234469652176, |
| "learning_rate": 4.7776816293523686e-05, |
| "loss": 0.0256, |
| "step": 10770 |
| }, |
| { |
| "epoch": 7.44475138121547, |
| "grad_norm": 0.1384185552597046, |
| "learning_rate": 4.769422764325832e-05, |
| "loss": 0.0254, |
| "step": 10780 |
| }, |
| { |
| "epoch": 7.451657458563536, |
| "grad_norm": 0.1439378261566162, |
| "learning_rate": 4.76116452968865e-05, |
| "loss": 0.0263, |
| "step": 10790 |
| }, |
| { |
| "epoch": 7.458563535911602, |
| "grad_norm": 0.1375391185283661, |
| "learning_rate": 4.752906948018525e-05, |
| "loss": 0.0231, |
| "step": 10800 |
| }, |
| { |
| "epoch": 7.4654696132596685, |
| "grad_norm": 0.0991685688495636, |
| "learning_rate": 4.7446500418913684e-05, |
| "loss": 0.0238, |
| "step": 10810 |
| }, |
| { |
| "epoch": 7.472375690607735, |
| "grad_norm": 0.12018142640590668, |
| "learning_rate": 4.736393833881247e-05, |
| "loss": 0.0249, |
| "step": 10820 |
| }, |
| { |
| "epoch": 7.4792817679558015, |
| "grad_norm": 0.15268614888191223, |
| "learning_rate": 4.7281383465603194e-05, |
| "loss": 0.0278, |
| "step": 10830 |
| }, |
| { |
| "epoch": 7.486187845303867, |
| "grad_norm": 0.15701478719711304, |
| "learning_rate": 4.71988360249877e-05, |
| "loss": 0.0267, |
| "step": 10840 |
| }, |
| { |
| "epoch": 7.4930939226519335, |
| "grad_norm": 0.18149255216121674, |
| "learning_rate": 4.7116296242647554e-05, |
| "loss": 0.0252, |
| "step": 10850 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 0.17894303798675537, |
| "learning_rate": 4.703376434424336e-05, |
| "loss": 0.0192, |
| "step": 10860 |
| }, |
| { |
| "epoch": 7.5069060773480665, |
| "grad_norm": 0.11874563992023468, |
| "learning_rate": 4.695124055541421e-05, |
| "loss": 0.0235, |
| "step": 10870 |
| }, |
| { |
| "epoch": 7.513812154696133, |
| "grad_norm": 0.12064361572265625, |
| "learning_rate": 4.6868725101776934e-05, |
| "loss": 0.0219, |
| "step": 10880 |
| }, |
| { |
| "epoch": 7.5207182320441985, |
| "grad_norm": 0.1320265680551529, |
| "learning_rate": 4.678621820892567e-05, |
| "loss": 0.0225, |
| "step": 10890 |
| }, |
| { |
| "epoch": 7.527624309392265, |
| "grad_norm": 0.13714221119880676, |
| "learning_rate": 4.670372010243111e-05, |
| "loss": 0.0249, |
| "step": 10900 |
| }, |
| { |
| "epoch": 7.5345303867403315, |
| "grad_norm": 0.11703638732433319, |
| "learning_rate": 4.662123100783992e-05, |
| "loss": 0.0236, |
| "step": 10910 |
| }, |
| { |
| "epoch": 7.541436464088398, |
| "grad_norm": 0.15061037242412567, |
| "learning_rate": 4.653875115067415e-05, |
| "loss": 0.0216, |
| "step": 10920 |
| }, |
| { |
| "epoch": 7.548342541436464, |
| "grad_norm": 0.16458094120025635, |
| "learning_rate": 4.6456280756430545e-05, |
| "loss": 0.0263, |
| "step": 10930 |
| }, |
| { |
| "epoch": 7.55524861878453, |
| "grad_norm": 0.13051296770572662, |
| "learning_rate": 4.637382005058004e-05, |
| "loss": 0.0231, |
| "step": 10940 |
| }, |
| { |
| "epoch": 7.5621546961325965, |
| "grad_norm": 0.09485608339309692, |
| "learning_rate": 4.629136925856705e-05, |
| "loss": 0.0235, |
| "step": 10950 |
| }, |
| { |
| "epoch": 7.569060773480663, |
| "grad_norm": 0.18857619166374207, |
| "learning_rate": 4.6208928605808895e-05, |
| "loss": 0.0229, |
| "step": 10960 |
| }, |
| { |
| "epoch": 7.5759668508287294, |
| "grad_norm": 0.16706739366054535, |
| "learning_rate": 4.612649831769519e-05, |
| "loss": 0.0256, |
| "step": 10970 |
| }, |
| { |
| "epoch": 7.582872928176796, |
| "grad_norm": 0.23252038657665253, |
| "learning_rate": 4.604407861958715e-05, |
| "loss": 0.0245, |
| "step": 10980 |
| }, |
| { |
| "epoch": 7.5897790055248615, |
| "grad_norm": 0.12180530279874802, |
| "learning_rate": 4.5961669736817114e-05, |
| "loss": 0.0258, |
| "step": 10990 |
| }, |
| { |
| "epoch": 7.596685082872928, |
| "grad_norm": 0.2081458866596222, |
| "learning_rate": 4.5879271894687814e-05, |
| "loss": 0.0291, |
| "step": 11000 |
| }, |
| { |
| "epoch": 7.6035911602209945, |
| "grad_norm": 0.14544755220413208, |
| "learning_rate": 4.5796885318471826e-05, |
| "loss": 0.0234, |
| "step": 11010 |
| }, |
| { |
| "epoch": 7.610497237569061, |
| "grad_norm": 0.11017230153083801, |
| "learning_rate": 4.571451023341086e-05, |
| "loss": 0.022, |
| "step": 11020 |
| }, |
| { |
| "epoch": 7.617403314917127, |
| "grad_norm": 0.11155513674020767, |
| "learning_rate": 4.563214686471527e-05, |
| "loss": 0.0311, |
| "step": 11030 |
| }, |
| { |
| "epoch": 7.624309392265193, |
| "grad_norm": 0.16592305898666382, |
| "learning_rate": 4.5549795437563365e-05, |
| "loss": 0.0239, |
| "step": 11040 |
| }, |
| { |
| "epoch": 7.6312154696132595, |
| "grad_norm": 0.15650154650211334, |
| "learning_rate": 4.546745617710081e-05, |
| "loss": 0.0225, |
| "step": 11050 |
| }, |
| { |
| "epoch": 7.638121546961326, |
| "grad_norm": 0.13385139405727386, |
| "learning_rate": 4.5385129308440014e-05, |
| "loss": 0.0218, |
| "step": 11060 |
| }, |
| { |
| "epoch": 7.645027624309392, |
| "grad_norm": 0.1123754158616066, |
| "learning_rate": 4.530281505665944e-05, |
| "loss": 0.0237, |
| "step": 11070 |
| }, |
| { |
| "epoch": 7.651933701657459, |
| "grad_norm": 0.13245031237602234, |
| "learning_rate": 4.5220513646803134e-05, |
| "loss": 0.0259, |
| "step": 11080 |
| }, |
| { |
| "epoch": 7.6588397790055245, |
| "grad_norm": 0.15437833964824677, |
| "learning_rate": 4.513822530388003e-05, |
| "loss": 0.0244, |
| "step": 11090 |
| }, |
| { |
| "epoch": 7.665745856353591, |
| "grad_norm": 0.15100827813148499, |
| "learning_rate": 4.5055950252863296e-05, |
| "loss": 0.0242, |
| "step": 11100 |
| }, |
| { |
| "epoch": 7.672651933701657, |
| "grad_norm": 0.13512608408927917, |
| "learning_rate": 4.4973688718689803e-05, |
| "loss": 0.0262, |
| "step": 11110 |
| }, |
| { |
| "epoch": 7.679558011049724, |
| "grad_norm": 0.17417916655540466, |
| "learning_rate": 4.4891440926259406e-05, |
| "loss": 0.0243, |
| "step": 11120 |
| }, |
| { |
| "epoch": 7.68646408839779, |
| "grad_norm": 0.14423848688602448, |
| "learning_rate": 4.480920710043443e-05, |
| "loss": 0.0264, |
| "step": 11130 |
| }, |
| { |
| "epoch": 7.693370165745856, |
| "grad_norm": 0.16867166757583618, |
| "learning_rate": 4.4726987466039044e-05, |
| "loss": 0.0282, |
| "step": 11140 |
| }, |
| { |
| "epoch": 7.7002762430939224, |
| "grad_norm": 0.19301173090934753, |
| "learning_rate": 4.46447822478586e-05, |
| "loss": 0.0311, |
| "step": 11150 |
| }, |
| { |
| "epoch": 7.707182320441989, |
| "grad_norm": 0.14649568498134613, |
| "learning_rate": 4.4562591670638974e-05, |
| "loss": 0.0242, |
| "step": 11160 |
| }, |
| { |
| "epoch": 7.714088397790055, |
| "grad_norm": 0.12760896980762482, |
| "learning_rate": 4.4480415959086105e-05, |
| "loss": 0.0241, |
| "step": 11170 |
| }, |
| { |
| "epoch": 7.720994475138122, |
| "grad_norm": 0.09078028798103333, |
| "learning_rate": 4.439825533786522e-05, |
| "loss": 0.0258, |
| "step": 11180 |
| }, |
| { |
| "epoch": 7.7279005524861875, |
| "grad_norm": 0.18043041229248047, |
| "learning_rate": 4.431611003160035e-05, |
| "loss": 0.0231, |
| "step": 11190 |
| }, |
| { |
| "epoch": 7.734806629834254, |
| "grad_norm": 0.1317226141691208, |
| "learning_rate": 4.4233980264873636e-05, |
| "loss": 0.0272, |
| "step": 11200 |
| }, |
| { |
| "epoch": 7.74171270718232, |
| "grad_norm": 0.09718924015760422, |
| "learning_rate": 4.4151866262224684e-05, |
| "loss": 0.0272, |
| "step": 11210 |
| }, |
| { |
| "epoch": 7.748618784530387, |
| "grad_norm": 0.13303658366203308, |
| "learning_rate": 4.406976824815006e-05, |
| "loss": 0.0232, |
| "step": 11220 |
| }, |
| { |
| "epoch": 7.755524861878453, |
| "grad_norm": 0.13181060552597046, |
| "learning_rate": 4.3987686447102595e-05, |
| "loss": 0.0238, |
| "step": 11230 |
| }, |
| { |
| "epoch": 7.762430939226519, |
| "grad_norm": 0.14495405554771423, |
| "learning_rate": 4.3905621083490804e-05, |
| "loss": 0.0266, |
| "step": 11240 |
| }, |
| { |
| "epoch": 7.769337016574585, |
| "grad_norm": 0.16467227041721344, |
| "learning_rate": 4.3823572381678286e-05, |
| "loss": 0.025, |
| "step": 11250 |
| }, |
| { |
| "epoch": 7.776243093922652, |
| "grad_norm": 0.16528834402561188, |
| "learning_rate": 4.374154056598301e-05, |
| "loss": 0.0235, |
| "step": 11260 |
| }, |
| { |
| "epoch": 7.783149171270718, |
| "grad_norm": 0.14273691177368164, |
| "learning_rate": 4.3659525860676845e-05, |
| "loss": 0.0218, |
| "step": 11270 |
| }, |
| { |
| "epoch": 7.790055248618785, |
| "grad_norm": 0.11080687493085861, |
| "learning_rate": 4.3577528489984854e-05, |
| "loss": 0.0231, |
| "step": 11280 |
| }, |
| { |
| "epoch": 7.796961325966851, |
| "grad_norm": 0.15187494456768036, |
| "learning_rate": 4.349554867808476e-05, |
| "loss": 0.0244, |
| "step": 11290 |
| }, |
| { |
| "epoch": 7.803867403314917, |
| "grad_norm": 0.139710932970047, |
| "learning_rate": 4.34135866491062e-05, |
| "loss": 0.028, |
| "step": 11300 |
| }, |
| { |
| "epoch": 7.810773480662983, |
| "grad_norm": 0.16394203901290894, |
| "learning_rate": 4.333164262713022e-05, |
| "loss": 0.0237, |
| "step": 11310 |
| }, |
| { |
| "epoch": 7.81767955801105, |
| "grad_norm": 0.12090770900249481, |
| "learning_rate": 4.324971683618868e-05, |
| "loss": 0.0224, |
| "step": 11320 |
| }, |
| { |
| "epoch": 7.824585635359116, |
| "grad_norm": 0.11589226126670837, |
| "learning_rate": 4.316780950026354e-05, |
| "loss": 0.0259, |
| "step": 11330 |
| }, |
| { |
| "epoch": 7.831491712707182, |
| "grad_norm": 0.16017724573612213, |
| "learning_rate": 4.308592084328637e-05, |
| "loss": 0.0226, |
| "step": 11340 |
| }, |
| { |
| "epoch": 7.838397790055248, |
| "grad_norm": 0.14901770651340485, |
| "learning_rate": 4.3004051089137576e-05, |
| "loss": 0.0266, |
| "step": 11350 |
| }, |
| { |
| "epoch": 7.845303867403315, |
| "grad_norm": 0.1358511745929718, |
| "learning_rate": 4.292220046164597e-05, |
| "loss": 0.0197, |
| "step": 11360 |
| }, |
| { |
| "epoch": 7.852209944751381, |
| "grad_norm": 0.1269238144159317, |
| "learning_rate": 4.2840369184588035e-05, |
| "loss": 0.0255, |
| "step": 11370 |
| }, |
| { |
| "epoch": 7.859116022099448, |
| "grad_norm": 0.16940242052078247, |
| "learning_rate": 4.2758557481687345e-05, |
| "loss": 0.022, |
| "step": 11380 |
| }, |
| { |
| "epoch": 7.866022099447514, |
| "grad_norm": 0.1460244357585907, |
| "learning_rate": 4.267676557661403e-05, |
| "loss": 0.0194, |
| "step": 11390 |
| }, |
| { |
| "epoch": 7.87292817679558, |
| "grad_norm": 0.13890156149864197, |
| "learning_rate": 4.2594993692983955e-05, |
| "loss": 0.0199, |
| "step": 11400 |
| }, |
| { |
| "epoch": 7.879834254143646, |
| "grad_norm": 0.14905396103858948, |
| "learning_rate": 4.251324205435837e-05, |
| "loss": 0.0225, |
| "step": 11410 |
| }, |
| { |
| "epoch": 7.886740331491713, |
| "grad_norm": 0.12149365246295929, |
| "learning_rate": 4.243151088424312e-05, |
| "loss": 0.0243, |
| "step": 11420 |
| }, |
| { |
| "epoch": 7.893646408839779, |
| "grad_norm": 0.11986390501260757, |
| "learning_rate": 4.234980040608813e-05, |
| "loss": 0.023, |
| "step": 11430 |
| }, |
| { |
| "epoch": 7.900552486187845, |
| "grad_norm": 0.08489210903644562, |
| "learning_rate": 4.22681108432867e-05, |
| "loss": 0.0201, |
| "step": 11440 |
| }, |
| { |
| "epoch": 7.907458563535911, |
| "grad_norm": 0.15432953834533691, |
| "learning_rate": 4.2186442419174984e-05, |
| "loss": 0.0275, |
| "step": 11450 |
| }, |
| { |
| "epoch": 7.914364640883978, |
| "grad_norm": 0.1917029619216919, |
| "learning_rate": 4.210479535703133e-05, |
| "loss": 0.0274, |
| "step": 11460 |
| }, |
| { |
| "epoch": 7.921270718232044, |
| "grad_norm": 0.15083684027194977, |
| "learning_rate": 4.202316988007567e-05, |
| "loss": 0.0278, |
| "step": 11470 |
| }, |
| { |
| "epoch": 7.928176795580111, |
| "grad_norm": 0.14471709728240967, |
| "learning_rate": 4.194156621146901e-05, |
| "loss": 0.0239, |
| "step": 11480 |
| }, |
| { |
| "epoch": 7.935082872928177, |
| "grad_norm": 0.18719494342803955, |
| "learning_rate": 4.1859984574312596e-05, |
| "loss": 0.0208, |
| "step": 11490 |
| }, |
| { |
| "epoch": 7.941988950276243, |
| "grad_norm": 0.09603523463010788, |
| "learning_rate": 4.177842519164752e-05, |
| "loss": 0.0254, |
| "step": 11500 |
| }, |
| { |
| "epoch": 7.948895027624309, |
| "grad_norm": 0.18613314628601074, |
| "learning_rate": 4.169688828645404e-05, |
| "loss": 0.0214, |
| "step": 11510 |
| }, |
| { |
| "epoch": 7.955801104972376, |
| "grad_norm": 0.1553083211183548, |
| "learning_rate": 4.161537408165092e-05, |
| "loss": 0.0256, |
| "step": 11520 |
| }, |
| { |
| "epoch": 7.962707182320442, |
| "grad_norm": 0.19505159556865692, |
| "learning_rate": 4.1533882800094924e-05, |
| "loss": 0.0243, |
| "step": 11530 |
| }, |
| { |
| "epoch": 7.969613259668508, |
| "grad_norm": 0.21026429533958435, |
| "learning_rate": 4.145241466458005e-05, |
| "loss": 0.0217, |
| "step": 11540 |
| }, |
| { |
| "epoch": 7.976519337016574, |
| "grad_norm": 0.13795223832130432, |
| "learning_rate": 4.13709698978371e-05, |
| "loss": 0.0227, |
| "step": 11550 |
| }, |
| { |
| "epoch": 7.983425414364641, |
| "grad_norm": 0.16697093844413757, |
| "learning_rate": 4.1289548722532944e-05, |
| "loss": 0.0228, |
| "step": 11560 |
| }, |
| { |
| "epoch": 7.990331491712707, |
| "grad_norm": 0.18963934481143951, |
| "learning_rate": 4.120815136126999e-05, |
| "loss": 0.0238, |
| "step": 11570 |
| }, |
| { |
| "epoch": 7.997237569060774, |
| "grad_norm": 0.13078151643276215, |
| "learning_rate": 4.112677803658548e-05, |
| "loss": 0.0196, |
| "step": 11580 |
| }, |
| { |
| "epoch": 8.00414364640884, |
| "grad_norm": 0.12813352048397064, |
| "learning_rate": 4.1045428970951e-05, |
| "loss": 0.0221, |
| "step": 11590 |
| }, |
| { |
| "epoch": 8.011049723756907, |
| "grad_norm": 0.125010147690773, |
| "learning_rate": 4.0964104386771785e-05, |
| "loss": 0.0223, |
| "step": 11600 |
| }, |
| { |
| "epoch": 8.017955801104973, |
| "grad_norm": 0.10712146013975143, |
| "learning_rate": 4.0882804506386144e-05, |
| "loss": 0.0218, |
| "step": 11610 |
| }, |
| { |
| "epoch": 8.024861878453038, |
| "grad_norm": 0.17532770335674286, |
| "learning_rate": 4.080152955206485e-05, |
| "loss": 0.0243, |
| "step": 11620 |
| }, |
| { |
| "epoch": 8.031767955801104, |
| "grad_norm": 0.14849954843521118, |
| "learning_rate": 4.0720279746010505e-05, |
| "loss": 0.0225, |
| "step": 11630 |
| }, |
| { |
| "epoch": 8.03867403314917, |
| "grad_norm": 0.1307544708251953, |
| "learning_rate": 4.063905531035699e-05, |
| "loss": 0.0251, |
| "step": 11640 |
| }, |
| { |
| "epoch": 8.045580110497237, |
| "grad_norm": 0.11142382770776749, |
| "learning_rate": 4.055785646716882e-05, |
| "loss": 0.0226, |
| "step": 11650 |
| }, |
| { |
| "epoch": 8.052486187845304, |
| "grad_norm": 0.09290549904108047, |
| "learning_rate": 4.047668343844051e-05, |
| "loss": 0.0216, |
| "step": 11660 |
| }, |
| { |
| "epoch": 8.05939226519337, |
| "grad_norm": 0.14706315100193024, |
| "learning_rate": 4.039553644609604e-05, |
| "loss": 0.0217, |
| "step": 11670 |
| }, |
| { |
| "epoch": 8.066298342541437, |
| "grad_norm": 0.16171616315841675, |
| "learning_rate": 4.0314415711988176e-05, |
| "loss": 0.0213, |
| "step": 11680 |
| }, |
| { |
| "epoch": 8.073204419889503, |
| "grad_norm": 0.09882531315088272, |
| "learning_rate": 4.023332145789792e-05, |
| "loss": 0.0197, |
| "step": 11690 |
| }, |
| { |
| "epoch": 8.08011049723757, |
| "grad_norm": 0.13337063789367676, |
| "learning_rate": 4.015225390553385e-05, |
| "loss": 0.0249, |
| "step": 11700 |
| }, |
| { |
| "epoch": 8.087016574585636, |
| "grad_norm": 0.1553422063589096, |
| "learning_rate": 4.007121327653158e-05, |
| "loss": 0.0294, |
| "step": 11710 |
| }, |
| { |
| "epoch": 8.0939226519337, |
| "grad_norm": 0.1073046326637268, |
| "learning_rate": 3.9990199792453064e-05, |
| "loss": 0.0198, |
| "step": 11720 |
| }, |
| { |
| "epoch": 8.100828729281767, |
| "grad_norm": 0.11477019637823105, |
| "learning_rate": 3.9909213674786103e-05, |
| "loss": 0.0207, |
| "step": 11730 |
| }, |
| { |
| "epoch": 8.107734806629834, |
| "grad_norm": 0.10524137318134308, |
| "learning_rate": 3.982825514494363e-05, |
| "loss": 0.023, |
| "step": 11740 |
| }, |
| { |
| "epoch": 8.1146408839779, |
| "grad_norm": 0.15390941500663757, |
| "learning_rate": 3.974732442426319e-05, |
| "loss": 0.0225, |
| "step": 11750 |
| }, |
| { |
| "epoch": 8.121546961325967, |
| "grad_norm": 0.09843657910823822, |
| "learning_rate": 3.966642173400629e-05, |
| "loss": 0.0201, |
| "step": 11760 |
| }, |
| { |
| "epoch": 8.128453038674033, |
| "grad_norm": 0.17840729653835297, |
| "learning_rate": 3.9585547295357764e-05, |
| "loss": 0.0191, |
| "step": 11770 |
| }, |
| { |
| "epoch": 8.1353591160221, |
| "grad_norm": 0.11976867914199829, |
| "learning_rate": 3.950470132942526e-05, |
| "loss": 0.0208, |
| "step": 11780 |
| }, |
| { |
| "epoch": 8.142265193370166, |
| "grad_norm": 0.16819389164447784, |
| "learning_rate": 3.942388405723856e-05, |
| "loss": 0.0212, |
| "step": 11790 |
| }, |
| { |
| "epoch": 8.149171270718233, |
| "grad_norm": 0.13980218768119812, |
| "learning_rate": 3.9343095699749e-05, |
| "loss": 0.0252, |
| "step": 11800 |
| }, |
| { |
| "epoch": 8.1560773480663, |
| "grad_norm": 0.15626643598079681, |
| "learning_rate": 3.9262336477828874e-05, |
| "loss": 0.0205, |
| "step": 11810 |
| }, |
| { |
| "epoch": 8.162983425414364, |
| "grad_norm": 0.2204706221818924, |
| "learning_rate": 3.9181606612270794e-05, |
| "loss": 0.0256, |
| "step": 11820 |
| }, |
| { |
| "epoch": 8.16988950276243, |
| "grad_norm": 0.19606149196624756, |
| "learning_rate": 3.910090632378713e-05, |
| "loss": 0.0247, |
| "step": 11830 |
| }, |
| { |
| "epoch": 8.176795580110497, |
| "grad_norm": 0.1758870631456375, |
| "learning_rate": 3.90202358330094e-05, |
| "loss": 0.0199, |
| "step": 11840 |
| }, |
| { |
| "epoch": 8.183701657458563, |
| "grad_norm": 0.07964209467172623, |
| "learning_rate": 3.8939595360487656e-05, |
| "loss": 0.0221, |
| "step": 11850 |
| }, |
| { |
| "epoch": 8.19060773480663, |
| "grad_norm": 0.14514313638210297, |
| "learning_rate": 3.885898512668984e-05, |
| "loss": 0.0255, |
| "step": 11860 |
| }, |
| { |
| "epoch": 8.197513812154696, |
| "grad_norm": 0.1320313960313797, |
| "learning_rate": 3.877840535200127e-05, |
| "loss": 0.0274, |
| "step": 11870 |
| }, |
| { |
| "epoch": 8.204419889502763, |
| "grad_norm": 0.1110544502735138, |
| "learning_rate": 3.869785625672397e-05, |
| "loss": 0.019, |
| "step": 11880 |
| }, |
| { |
| "epoch": 8.21132596685083, |
| "grad_norm": 0.11055376380681992, |
| "learning_rate": 3.8617338061076094e-05, |
| "loss": 0.0198, |
| "step": 11890 |
| }, |
| { |
| "epoch": 8.218232044198896, |
| "grad_norm": 0.15744233131408691, |
| "learning_rate": 3.853685098519132e-05, |
| "loss": 0.0253, |
| "step": 11900 |
| }, |
| { |
| "epoch": 8.225138121546962, |
| "grad_norm": 0.14390379190444946, |
| "learning_rate": 3.845639524911823e-05, |
| "loss": 0.0264, |
| "step": 11910 |
| }, |
| { |
| "epoch": 8.232044198895027, |
| "grad_norm": 0.09943123161792755, |
| "learning_rate": 3.837597107281974e-05, |
| "loss": 0.0178, |
| "step": 11920 |
| }, |
| { |
| "epoch": 8.238950276243093, |
| "grad_norm": 0.15828579664230347, |
| "learning_rate": 3.829557867617247e-05, |
| "loss": 0.0191, |
| "step": 11930 |
| }, |
| { |
| "epoch": 8.24585635359116, |
| "grad_norm": 0.12498103082180023, |
| "learning_rate": 3.821521827896618e-05, |
| "loss": 0.0232, |
| "step": 11940 |
| }, |
| { |
| "epoch": 8.252762430939226, |
| "grad_norm": 0.12863773107528687, |
| "learning_rate": 3.81348901009031e-05, |
| "loss": 0.0221, |
| "step": 11950 |
| }, |
| { |
| "epoch": 8.259668508287293, |
| "grad_norm": 0.15252485871315002, |
| "learning_rate": 3.805459436159741e-05, |
| "loss": 0.0219, |
| "step": 11960 |
| }, |
| { |
| "epoch": 8.26657458563536, |
| "grad_norm": 0.09118683636188507, |
| "learning_rate": 3.797433128057461e-05, |
| "loss": 0.025, |
| "step": 11970 |
| }, |
| { |
| "epoch": 8.273480662983426, |
| "grad_norm": 0.17367224395275116, |
| "learning_rate": 3.789410107727089e-05, |
| "loss": 0.0216, |
| "step": 11980 |
| }, |
| { |
| "epoch": 8.280386740331492, |
| "grad_norm": 0.10616571456193924, |
| "learning_rate": 3.781390397103257e-05, |
| "loss": 0.0209, |
| "step": 11990 |
| }, |
| { |
| "epoch": 8.287292817679559, |
| "grad_norm": 0.19001731276512146, |
| "learning_rate": 3.7733740181115455e-05, |
| "loss": 0.0228, |
| "step": 12000 |
| }, |
| { |
| "epoch": 8.294198895027625, |
| "grad_norm": 0.16032475233078003, |
| "learning_rate": 3.7653609926684306e-05, |
| "loss": 0.0218, |
| "step": 12010 |
| }, |
| { |
| "epoch": 8.30110497237569, |
| "grad_norm": 0.12353052943944931, |
| "learning_rate": 3.757351342681217e-05, |
| "loss": 0.0237, |
| "step": 12020 |
| }, |
| { |
| "epoch": 8.308011049723756, |
| "grad_norm": 0.15098872780799866, |
| "learning_rate": 3.749345090047982e-05, |
| "loss": 0.0244, |
| "step": 12030 |
| }, |
| { |
| "epoch": 8.314917127071823, |
| "grad_norm": 0.13546410202980042, |
| "learning_rate": 3.741342256657515e-05, |
| "loss": 0.0206, |
| "step": 12040 |
| }, |
| { |
| "epoch": 8.32182320441989, |
| "grad_norm": 0.13305281102657318, |
| "learning_rate": 3.7333428643892567e-05, |
| "loss": 0.0251, |
| "step": 12050 |
| }, |
| { |
| "epoch": 8.328729281767956, |
| "grad_norm": 0.11955028027296066, |
| "learning_rate": 3.725346935113239e-05, |
| "loss": 0.0247, |
| "step": 12060 |
| }, |
| { |
| "epoch": 8.335635359116022, |
| "grad_norm": 0.16611330211162567, |
| "learning_rate": 3.717354490690029e-05, |
| "loss": 0.0263, |
| "step": 12070 |
| }, |
| { |
| "epoch": 8.342541436464089, |
| "grad_norm": 0.16200731694698334, |
| "learning_rate": 3.709365552970664e-05, |
| "loss": 0.0217, |
| "step": 12080 |
| }, |
| { |
| "epoch": 8.349447513812155, |
| "grad_norm": 0.12580612301826477, |
| "learning_rate": 3.7013801437965945e-05, |
| "loss": 0.0222, |
| "step": 12090 |
| }, |
| { |
| "epoch": 8.356353591160222, |
| "grad_norm": 0.15770767629146576, |
| "learning_rate": 3.693398284999623e-05, |
| "loss": 0.0224, |
| "step": 12100 |
| }, |
| { |
| "epoch": 8.363259668508288, |
| "grad_norm": 0.15847472846508026, |
| "learning_rate": 3.6854199984018484e-05, |
| "loss": 0.0219, |
| "step": 12110 |
| }, |
| { |
| "epoch": 8.370165745856353, |
| "grad_norm": 0.16961674392223358, |
| "learning_rate": 3.677445305815601e-05, |
| "loss": 0.0234, |
| "step": 12120 |
| }, |
| { |
| "epoch": 8.37707182320442, |
| "grad_norm": 0.14952635765075684, |
| "learning_rate": 3.669474229043387e-05, |
| "loss": 0.0213, |
| "step": 12130 |
| }, |
| { |
| "epoch": 8.383977900552486, |
| "grad_norm": 0.1705881506204605, |
| "learning_rate": 3.6615067898778235e-05, |
| "loss": 0.0221, |
| "step": 12140 |
| }, |
| { |
| "epoch": 8.390883977900552, |
| "grad_norm": 0.09255818277597427, |
| "learning_rate": 3.6535430101015866e-05, |
| "loss": 0.0235, |
| "step": 12150 |
| }, |
| { |
| "epoch": 8.397790055248619, |
| "grad_norm": 0.12237586826086044, |
| "learning_rate": 3.645582911487345e-05, |
| "loss": 0.0297, |
| "step": 12160 |
| }, |
| { |
| "epoch": 8.404696132596685, |
| "grad_norm": 0.1676071137189865, |
| "learning_rate": 3.637626515797706e-05, |
| "loss": 0.0222, |
| "step": 12170 |
| }, |
| { |
| "epoch": 8.411602209944752, |
| "grad_norm": 0.16139745712280273, |
| "learning_rate": 3.629673844785152e-05, |
| "loss": 0.0226, |
| "step": 12180 |
| }, |
| { |
| "epoch": 8.418508287292818, |
| "grad_norm": 0.12163612991571426, |
| "learning_rate": 3.621724920191979e-05, |
| "loss": 0.0242, |
| "step": 12190 |
| }, |
| { |
| "epoch": 8.425414364640885, |
| "grad_norm": 0.13052451610565186, |
| "learning_rate": 3.6137797637502444e-05, |
| "loss": 0.0196, |
| "step": 12200 |
| }, |
| { |
| "epoch": 8.432320441988951, |
| "grad_norm": 0.16374514997005463, |
| "learning_rate": 3.6058383971817035e-05, |
| "loss": 0.0235, |
| "step": 12210 |
| }, |
| { |
| "epoch": 8.439226519337016, |
| "grad_norm": 0.14451929926872253, |
| "learning_rate": 3.59790084219775e-05, |
| "loss": 0.022, |
| "step": 12220 |
| }, |
| { |
| "epoch": 8.446132596685082, |
| "grad_norm": 0.16537247598171234, |
| "learning_rate": 3.589967120499353e-05, |
| "loss": 0.0278, |
| "step": 12230 |
| }, |
| { |
| "epoch": 8.453038674033149, |
| "grad_norm": 0.17903123795986176, |
| "learning_rate": 3.5820372537770075e-05, |
| "loss": 0.0212, |
| "step": 12240 |
| }, |
| { |
| "epoch": 8.459944751381215, |
| "grad_norm": 0.1467219591140747, |
| "learning_rate": 3.5741112637106655e-05, |
| "loss": 0.0228, |
| "step": 12250 |
| }, |
| { |
| "epoch": 8.466850828729282, |
| "grad_norm": 0.14892370998859406, |
| "learning_rate": 3.5661891719696804e-05, |
| "loss": 0.0264, |
| "step": 12260 |
| }, |
| { |
| "epoch": 8.473756906077348, |
| "grad_norm": 0.10842717438936234, |
| "learning_rate": 3.5582710002127504e-05, |
| "loss": 0.0236, |
| "step": 12270 |
| }, |
| { |
| "epoch": 8.480662983425415, |
| "grad_norm": 0.12782053649425507, |
| "learning_rate": 3.550356770087853e-05, |
| "loss": 0.0221, |
| "step": 12280 |
| }, |
| { |
| "epoch": 8.487569060773481, |
| "grad_norm": 0.15519659221172333, |
| "learning_rate": 3.5424465032321914e-05, |
| "loss": 0.0235, |
| "step": 12290 |
| }, |
| { |
| "epoch": 8.494475138121548, |
| "grad_norm": 0.16490302979946136, |
| "learning_rate": 3.5345402212721335e-05, |
| "loss": 0.0188, |
| "step": 12300 |
| }, |
| { |
| "epoch": 8.501381215469614, |
| "grad_norm": 0.150857076048851, |
| "learning_rate": 3.526637945823152e-05, |
| "loss": 0.0189, |
| "step": 12310 |
| }, |
| { |
| "epoch": 8.50828729281768, |
| "grad_norm": 0.12046564370393753, |
| "learning_rate": 3.518739698489767e-05, |
| "loss": 0.0211, |
| "step": 12320 |
| }, |
| { |
| "epoch": 8.515193370165745, |
| "grad_norm": 0.16205374896526337, |
| "learning_rate": 3.510845500865485e-05, |
| "loss": 0.0225, |
| "step": 12330 |
| }, |
| { |
| "epoch": 8.522099447513812, |
| "grad_norm": 0.15190370380878448, |
| "learning_rate": 3.502955374532739e-05, |
| "loss": 0.0226, |
| "step": 12340 |
| }, |
| { |
| "epoch": 8.529005524861878, |
| "grad_norm": 0.16450901329517365, |
| "learning_rate": 3.495069341062836e-05, |
| "loss": 0.0206, |
| "step": 12350 |
| }, |
| { |
| "epoch": 8.535911602209945, |
| "grad_norm": 0.11976935714483261, |
| "learning_rate": 3.4871874220158896e-05, |
| "loss": 0.0214, |
| "step": 12360 |
| }, |
| { |
| "epoch": 8.542817679558011, |
| "grad_norm": 0.13164155185222626, |
| "learning_rate": 3.479309638940762e-05, |
| "loss": 0.0197, |
| "step": 12370 |
| }, |
| { |
| "epoch": 8.549723756906078, |
| "grad_norm": 0.13826210796833038, |
| "learning_rate": 3.4714360133750146e-05, |
| "loss": 0.025, |
| "step": 12380 |
| }, |
| { |
| "epoch": 8.556629834254144, |
| "grad_norm": 0.16191700100898743, |
| "learning_rate": 3.463566566844839e-05, |
| "loss": 0.0222, |
| "step": 12390 |
| }, |
| { |
| "epoch": 8.56353591160221, |
| "grad_norm": 0.14451029896736145, |
| "learning_rate": 3.4557013208650016e-05, |
| "loss": 0.0219, |
| "step": 12400 |
| }, |
| { |
| "epoch": 8.570441988950277, |
| "grad_norm": 0.1787310689687729, |
| "learning_rate": 3.4478402969387857e-05, |
| "loss": 0.024, |
| "step": 12410 |
| }, |
| { |
| "epoch": 8.577348066298342, |
| "grad_norm": 0.15901915729045868, |
| "learning_rate": 3.4399835165579266e-05, |
| "loss": 0.0233, |
| "step": 12420 |
| }, |
| { |
| "epoch": 8.584254143646408, |
| "grad_norm": 0.1560279279947281, |
| "learning_rate": 3.4321310012025645e-05, |
| "loss": 0.0188, |
| "step": 12430 |
| }, |
| { |
| "epoch": 8.591160220994475, |
| "grad_norm": 0.15497836470603943, |
| "learning_rate": 3.424282772341176e-05, |
| "loss": 0.0238, |
| "step": 12440 |
| }, |
| { |
| "epoch": 8.598066298342541, |
| "grad_norm": 0.13106022775173187, |
| "learning_rate": 3.416438851430519e-05, |
| "loss": 0.019, |
| "step": 12450 |
| }, |
| { |
| "epoch": 8.604972375690608, |
| "grad_norm": 0.13551107048988342, |
| "learning_rate": 3.408599259915577e-05, |
| "loss": 0.0207, |
| "step": 12460 |
| }, |
| { |
| "epoch": 8.611878453038674, |
| "grad_norm": 0.15204903483390808, |
| "learning_rate": 3.400764019229487e-05, |
| "loss": 0.0216, |
| "step": 12470 |
| }, |
| { |
| "epoch": 8.61878453038674, |
| "grad_norm": 0.16227421164512634, |
| "learning_rate": 3.3929331507935035e-05, |
| "loss": 0.0237, |
| "step": 12480 |
| }, |
| { |
| "epoch": 8.625690607734807, |
| "grad_norm": 0.1513846218585968, |
| "learning_rate": 3.3851066760169196e-05, |
| "loss": 0.0221, |
| "step": 12490 |
| }, |
| { |
| "epoch": 8.632596685082873, |
| "grad_norm": 0.16585606336593628, |
| "learning_rate": 3.377284616297021e-05, |
| "loss": 0.0205, |
| "step": 12500 |
| }, |
| { |
| "epoch": 8.63950276243094, |
| "grad_norm": 0.1781190186738968, |
| "learning_rate": 3.3694669930190166e-05, |
| "loss": 0.0196, |
| "step": 12510 |
| }, |
| { |
| "epoch": 8.646408839779006, |
| "grad_norm": 0.1248004138469696, |
| "learning_rate": 3.36165382755599e-05, |
| "loss": 0.0198, |
| "step": 12520 |
| }, |
| { |
| "epoch": 8.653314917127071, |
| "grad_norm": 0.14076818525791168, |
| "learning_rate": 3.35384514126884e-05, |
| "loss": 0.0184, |
| "step": 12530 |
| }, |
| { |
| "epoch": 8.660220994475138, |
| "grad_norm": 0.14751794934272766, |
| "learning_rate": 3.3460409555062154e-05, |
| "loss": 0.019, |
| "step": 12540 |
| }, |
| { |
| "epoch": 8.667127071823204, |
| "grad_norm": 0.17103084921836853, |
| "learning_rate": 3.3382412916044645e-05, |
| "loss": 0.0224, |
| "step": 12550 |
| }, |
| { |
| "epoch": 8.67403314917127, |
| "grad_norm": 0.1588471233844757, |
| "learning_rate": 3.330446170887566e-05, |
| "loss": 0.0216, |
| "step": 12560 |
| }, |
| { |
| "epoch": 8.680939226519337, |
| "grad_norm": 0.13995952904224396, |
| "learning_rate": 3.3226556146670834e-05, |
| "loss": 0.0202, |
| "step": 12570 |
| }, |
| { |
| "epoch": 8.687845303867404, |
| "grad_norm": 0.17354713380336761, |
| "learning_rate": 3.314869644242102e-05, |
| "loss": 0.0255, |
| "step": 12580 |
| }, |
| { |
| "epoch": 8.69475138121547, |
| "grad_norm": 0.1409205198287964, |
| "learning_rate": 3.3070882808991674e-05, |
| "loss": 0.0216, |
| "step": 12590 |
| }, |
| { |
| "epoch": 8.701657458563536, |
| "grad_norm": 0.15135961771011353, |
| "learning_rate": 3.2993115459122305e-05, |
| "loss": 0.0229, |
| "step": 12600 |
| }, |
| { |
| "epoch": 8.708563535911603, |
| "grad_norm": 0.1787615269422531, |
| "learning_rate": 3.2915394605425835e-05, |
| "loss": 0.0204, |
| "step": 12610 |
| }, |
| { |
| "epoch": 8.715469613259668, |
| "grad_norm": 0.1657034307718277, |
| "learning_rate": 3.283772046038816e-05, |
| "loss": 0.021, |
| "step": 12620 |
| }, |
| { |
| "epoch": 8.722375690607734, |
| "grad_norm": 0.14173857867717743, |
| "learning_rate": 3.276009323636739e-05, |
| "loss": 0.0189, |
| "step": 12630 |
| }, |
| { |
| "epoch": 8.7292817679558, |
| "grad_norm": 0.1563398241996765, |
| "learning_rate": 3.268251314559344e-05, |
| "loss": 0.0205, |
| "step": 12640 |
| }, |
| { |
| "epoch": 8.736187845303867, |
| "grad_norm": 0.1378328651189804, |
| "learning_rate": 3.2604980400167254e-05, |
| "loss": 0.02, |
| "step": 12650 |
| }, |
| { |
| "epoch": 8.743093922651934, |
| "grad_norm": 0.20059895515441895, |
| "learning_rate": 3.252749521206042e-05, |
| "loss": 0.0211, |
| "step": 12660 |
| }, |
| { |
| "epoch": 8.75, |
| "grad_norm": 0.11326922476291656, |
| "learning_rate": 3.2450057793114494e-05, |
| "loss": 0.0238, |
| "step": 12670 |
| }, |
| { |
| "epoch": 8.756906077348066, |
| "grad_norm": 0.17236323654651642, |
| "learning_rate": 3.2372668355040435e-05, |
| "loss": 0.0204, |
| "step": 12680 |
| }, |
| { |
| "epoch": 8.763812154696133, |
| "grad_norm": 0.10979443788528442, |
| "learning_rate": 3.2295327109418005e-05, |
| "loss": 0.0238, |
| "step": 12690 |
| }, |
| { |
| "epoch": 8.7707182320442, |
| "grad_norm": 0.11837854981422424, |
| "learning_rate": 3.221803426769518e-05, |
| "loss": 0.024, |
| "step": 12700 |
| }, |
| { |
| "epoch": 8.777624309392266, |
| "grad_norm": 0.10363747179508209, |
| "learning_rate": 3.214079004118768e-05, |
| "loss": 0.0213, |
| "step": 12710 |
| }, |
| { |
| "epoch": 8.784530386740332, |
| "grad_norm": 0.1248355507850647, |
| "learning_rate": 3.2063594641078234e-05, |
| "loss": 0.0223, |
| "step": 12720 |
| }, |
| { |
| "epoch": 8.791436464088397, |
| "grad_norm": 0.09620241075754166, |
| "learning_rate": 3.198644827841616e-05, |
| "loss": 0.0212, |
| "step": 12730 |
| }, |
| { |
| "epoch": 8.798342541436464, |
| "grad_norm": 0.16484878957271576, |
| "learning_rate": 3.1909351164116654e-05, |
| "loss": 0.0198, |
| "step": 12740 |
| }, |
| { |
| "epoch": 8.80524861878453, |
| "grad_norm": 0.15809795260429382, |
| "learning_rate": 3.183230350896026e-05, |
| "loss": 0.0216, |
| "step": 12750 |
| }, |
| { |
| "epoch": 8.812154696132596, |
| "grad_norm": 0.07511230558156967, |
| "learning_rate": 3.1755305523592337e-05, |
| "loss": 0.0226, |
| "step": 12760 |
| }, |
| { |
| "epoch": 8.819060773480663, |
| "grad_norm": 0.0902475044131279, |
| "learning_rate": 3.167835741852245e-05, |
| "loss": 0.0209, |
| "step": 12770 |
| }, |
| { |
| "epoch": 8.82596685082873, |
| "grad_norm": 0.16243767738342285, |
| "learning_rate": 3.160145940412378e-05, |
| "loss": 0.0254, |
| "step": 12780 |
| }, |
| { |
| "epoch": 8.832872928176796, |
| "grad_norm": 0.17259711027145386, |
| "learning_rate": 3.1524611690632545e-05, |
| "loss": 0.0239, |
| "step": 12790 |
| }, |
| { |
| "epoch": 8.839779005524862, |
| "grad_norm": 0.11631940305233002, |
| "learning_rate": 3.144781448814746e-05, |
| "loss": 0.022, |
| "step": 12800 |
| }, |
| { |
| "epoch": 8.846685082872929, |
| "grad_norm": 0.14422062039375305, |
| "learning_rate": 3.1371068006629145e-05, |
| "loss": 0.0266, |
| "step": 12810 |
| }, |
| { |
| "epoch": 8.853591160220994, |
| "grad_norm": 0.11181170493364334, |
| "learning_rate": 3.129437245589956e-05, |
| "loss": 0.0235, |
| "step": 12820 |
| }, |
| { |
| "epoch": 8.86049723756906, |
| "grad_norm": 0.168172687292099, |
| "learning_rate": 3.121772804564143e-05, |
| "loss": 0.0215, |
| "step": 12830 |
| }, |
| { |
| "epoch": 8.867403314917127, |
| "grad_norm": 0.14143504202365875, |
| "learning_rate": 3.11411349853976e-05, |
| "loss": 0.0245, |
| "step": 12840 |
| }, |
| { |
| "epoch": 8.874309392265193, |
| "grad_norm": 0.12448939681053162, |
| "learning_rate": 3.10645934845706e-05, |
| "loss": 0.0194, |
| "step": 12850 |
| }, |
| { |
| "epoch": 8.88121546961326, |
| "grad_norm": 0.11174539476633072, |
| "learning_rate": 3.098810375242196e-05, |
| "loss": 0.0196, |
| "step": 12860 |
| }, |
| { |
| "epoch": 8.888121546961326, |
| "grad_norm": 0.15435843169689178, |
| "learning_rate": 3.0911665998071704e-05, |
| "loss": 0.0231, |
| "step": 12870 |
| }, |
| { |
| "epoch": 8.895027624309392, |
| "grad_norm": 0.14516079425811768, |
| "learning_rate": 3.083528043049774e-05, |
| "loss": 0.021, |
| "step": 12880 |
| }, |
| { |
| "epoch": 8.901933701657459, |
| "grad_norm": 0.17633748054504395, |
| "learning_rate": 3.0758947258535255e-05, |
| "loss": 0.0216, |
| "step": 12890 |
| }, |
| { |
| "epoch": 8.908839779005525, |
| "grad_norm": 0.112397700548172, |
| "learning_rate": 3.068266669087625e-05, |
| "loss": 0.0197, |
| "step": 12900 |
| }, |
| { |
| "epoch": 8.915745856353592, |
| "grad_norm": 0.27577459812164307, |
| "learning_rate": 3.060643893606887e-05, |
| "loss": 0.0217, |
| "step": 12910 |
| }, |
| { |
| "epoch": 8.922651933701658, |
| "grad_norm": 0.15804259479045868, |
| "learning_rate": 3.053026420251693e-05, |
| "loss": 0.0202, |
| "step": 12920 |
| }, |
| { |
| "epoch": 8.929558011049723, |
| "grad_norm": 0.14614105224609375, |
| "learning_rate": 3.0454142698479183e-05, |
| "loss": 0.0241, |
| "step": 12930 |
| }, |
| { |
| "epoch": 8.93646408839779, |
| "grad_norm": 0.1321985125541687, |
| "learning_rate": 3.0378074632068954e-05, |
| "loss": 0.0193, |
| "step": 12940 |
| }, |
| { |
| "epoch": 8.943370165745856, |
| "grad_norm": 0.17028087377548218, |
| "learning_rate": 3.0302060211253408e-05, |
| "loss": 0.0222, |
| "step": 12950 |
| }, |
| { |
| "epoch": 8.950276243093922, |
| "grad_norm": 0.14149953424930573, |
| "learning_rate": 3.0226099643853073e-05, |
| "loss": 0.0193, |
| "step": 12960 |
| }, |
| { |
| "epoch": 8.957182320441989, |
| "grad_norm": 0.16676829755306244, |
| "learning_rate": 3.0150193137541283e-05, |
| "loss": 0.0244, |
| "step": 12970 |
| }, |
| { |
| "epoch": 8.964088397790055, |
| "grad_norm": 0.13386687636375427, |
| "learning_rate": 3.0074340899843467e-05, |
| "loss": 0.022, |
| "step": 12980 |
| }, |
| { |
| "epoch": 8.970994475138122, |
| "grad_norm": 0.13977794349193573, |
| "learning_rate": 2.999854313813677e-05, |
| "loss": 0.0204, |
| "step": 12990 |
| }, |
| { |
| "epoch": 8.977900552486188, |
| "grad_norm": 0.14581474661827087, |
| "learning_rate": 2.9922800059649382e-05, |
| "loss": 0.0211, |
| "step": 13000 |
| }, |
| { |
| "epoch": 8.984806629834255, |
| "grad_norm": 0.15299993753433228, |
| "learning_rate": 2.9847111871459976e-05, |
| "loss": 0.0245, |
| "step": 13010 |
| }, |
| { |
| "epoch": 8.99171270718232, |
| "grad_norm": 0.13112711906433105, |
| "learning_rate": 2.977147878049721e-05, |
| "loss": 0.0226, |
| "step": 13020 |
| }, |
| { |
| "epoch": 8.998618784530386, |
| "grad_norm": 0.16714787483215332, |
| "learning_rate": 2.9695900993539006e-05, |
| "loss": 0.0233, |
| "step": 13030 |
| }, |
| { |
| "epoch": 9.005524861878452, |
| "grad_norm": 0.1377313733100891, |
| "learning_rate": 2.9620378717212183e-05, |
| "loss": 0.0226, |
| "step": 13040 |
| }, |
| { |
| "epoch": 9.012430939226519, |
| "grad_norm": 0.15656685829162598, |
| "learning_rate": 2.9544912157991745e-05, |
| "loss": 0.0218, |
| "step": 13050 |
| }, |
| { |
| "epoch": 9.019337016574585, |
| "grad_norm": 0.2332046627998352, |
| "learning_rate": 2.9469501522200405e-05, |
| "loss": 0.0247, |
| "step": 13060 |
| }, |
| { |
| "epoch": 9.026243093922652, |
| "grad_norm": 0.14399616420269012, |
| "learning_rate": 2.9394147016007946e-05, |
| "loss": 0.0177, |
| "step": 13070 |
| }, |
| { |
| "epoch": 9.033149171270718, |
| "grad_norm": 0.12218581885099411, |
| "learning_rate": 2.9318848845430702e-05, |
| "loss": 0.0208, |
| "step": 13080 |
| }, |
| { |
| "epoch": 9.040055248618785, |
| "grad_norm": 0.1258987933397293, |
| "learning_rate": 2.9243607216331013e-05, |
| "loss": 0.0229, |
| "step": 13090 |
| }, |
| { |
| "epoch": 9.046961325966851, |
| "grad_norm": 0.11139830201864243, |
| "learning_rate": 2.916842233441661e-05, |
| "loss": 0.0196, |
| "step": 13100 |
| }, |
| { |
| "epoch": 9.053867403314918, |
| "grad_norm": 0.18543629348278046, |
| "learning_rate": 2.90932944052401e-05, |
| "loss": 0.0215, |
| "step": 13110 |
| }, |
| { |
| "epoch": 9.060773480662984, |
| "grad_norm": 0.17765985429286957, |
| "learning_rate": 2.9018223634198354e-05, |
| "loss": 0.0203, |
| "step": 13120 |
| }, |
| { |
| "epoch": 9.067679558011049, |
| "grad_norm": 0.1288689821958542, |
| "learning_rate": 2.8943210226532025e-05, |
| "loss": 0.0256, |
| "step": 13130 |
| }, |
| { |
| "epoch": 9.074585635359115, |
| "grad_norm": 0.12179256230592728, |
| "learning_rate": 2.8868254387324857e-05, |
| "loss": 0.0204, |
| "step": 13140 |
| }, |
| { |
| "epoch": 9.081491712707182, |
| "grad_norm": 0.1416948288679123, |
| "learning_rate": 2.8793356321503306e-05, |
| "loss": 0.0211, |
| "step": 13150 |
| }, |
| { |
| "epoch": 9.088397790055248, |
| "grad_norm": 0.1624247431755066, |
| "learning_rate": 2.87185162338358e-05, |
| "loss": 0.0184, |
| "step": 13160 |
| }, |
| { |
| "epoch": 9.095303867403315, |
| "grad_norm": 0.19820043444633484, |
| "learning_rate": 2.8643734328932253e-05, |
| "loss": 0.021, |
| "step": 13170 |
| }, |
| { |
| "epoch": 9.102209944751381, |
| "grad_norm": 0.1763990819454193, |
| "learning_rate": 2.856901081124359e-05, |
| "loss": 0.0204, |
| "step": 13180 |
| }, |
| { |
| "epoch": 9.109116022099448, |
| "grad_norm": 0.09194623678922653, |
| "learning_rate": 2.8494345885061002e-05, |
| "loss": 0.019, |
| "step": 13190 |
| }, |
| { |
| "epoch": 9.116022099447514, |
| "grad_norm": 0.12375927716493607, |
| "learning_rate": 2.8419739754515616e-05, |
| "loss": 0.025, |
| "step": 13200 |
| }, |
| { |
| "epoch": 9.12292817679558, |
| "grad_norm": 0.18553689122200012, |
| "learning_rate": 2.8345192623577666e-05, |
| "loss": 0.0233, |
| "step": 13210 |
| }, |
| { |
| "epoch": 9.129834254143647, |
| "grad_norm": 0.14989089965820312, |
| "learning_rate": 2.8270704696056193e-05, |
| "loss": 0.0212, |
| "step": 13220 |
| }, |
| { |
| "epoch": 9.136740331491712, |
| "grad_norm": 0.13519397377967834, |
| "learning_rate": 2.8196276175598367e-05, |
| "loss": 0.0234, |
| "step": 13230 |
| }, |
| { |
| "epoch": 9.143646408839778, |
| "grad_norm": 0.09733614325523376, |
| "learning_rate": 2.8121907265688884e-05, |
| "loss": 0.0271, |
| "step": 13240 |
| }, |
| { |
| "epoch": 9.150552486187845, |
| "grad_norm": 0.12137151509523392, |
| "learning_rate": 2.804759816964957e-05, |
| "loss": 0.0199, |
| "step": 13250 |
| }, |
| { |
| "epoch": 9.157458563535911, |
| "grad_norm": 0.1408531367778778, |
| "learning_rate": 2.797334909063857e-05, |
| "loss": 0.0218, |
| "step": 13260 |
| }, |
| { |
| "epoch": 9.164364640883978, |
| "grad_norm": 0.1920802891254425, |
| "learning_rate": 2.7899160231650056e-05, |
| "loss": 0.0229, |
| "step": 13270 |
| }, |
| { |
| "epoch": 9.171270718232044, |
| "grad_norm": 0.10271954536437988, |
| "learning_rate": 2.7825031795513585e-05, |
| "loss": 0.0242, |
| "step": 13280 |
| }, |
| { |
| "epoch": 9.17817679558011, |
| "grad_norm": 0.16070392727851868, |
| "learning_rate": 2.775096398489341e-05, |
| "loss": 0.0214, |
| "step": 13290 |
| }, |
| { |
| "epoch": 9.185082872928177, |
| "grad_norm": 0.1328756958246231, |
| "learning_rate": 2.7676957002288163e-05, |
| "loss": 0.0268, |
| "step": 13300 |
| }, |
| { |
| "epoch": 9.191988950276244, |
| "grad_norm": 0.11246970295906067, |
| "learning_rate": 2.760301105003003e-05, |
| "loss": 0.0208, |
| "step": 13310 |
| }, |
| { |
| "epoch": 9.19889502762431, |
| "grad_norm": 0.149558886885643, |
| "learning_rate": 2.752912633028446e-05, |
| "loss": 0.019, |
| "step": 13320 |
| }, |
| { |
| "epoch": 9.205801104972375, |
| "grad_norm": 0.14351671934127808, |
| "learning_rate": 2.7455303045049474e-05, |
| "loss": 0.0237, |
| "step": 13330 |
| }, |
| { |
| "epoch": 9.212707182320441, |
| "grad_norm": 0.15685635805130005, |
| "learning_rate": 2.7381541396155098e-05, |
| "loss": 0.0219, |
| "step": 13340 |
| }, |
| { |
| "epoch": 9.219613259668508, |
| "grad_norm": 0.19899433851242065, |
| "learning_rate": 2.730784158526286e-05, |
| "loss": 0.022, |
| "step": 13350 |
| }, |
| { |
| "epoch": 9.226519337016574, |
| "grad_norm": 0.13601619005203247, |
| "learning_rate": 2.723420381386521e-05, |
| "loss": 0.0208, |
| "step": 13360 |
| }, |
| { |
| "epoch": 9.23342541436464, |
| "grad_norm": 0.14668749272823334, |
| "learning_rate": 2.7160628283285018e-05, |
| "loss": 0.0205, |
| "step": 13370 |
| }, |
| { |
| "epoch": 9.240331491712707, |
| "grad_norm": 0.15259882807731628, |
| "learning_rate": 2.7087115194675007e-05, |
| "loss": 0.0209, |
| "step": 13380 |
| }, |
| { |
| "epoch": 9.247237569060774, |
| "grad_norm": 0.14573076367378235, |
| "learning_rate": 2.701366474901712e-05, |
| "loss": 0.0216, |
| "step": 13390 |
| }, |
| { |
| "epoch": 9.25414364640884, |
| "grad_norm": 0.12216416746377945, |
| "learning_rate": 2.6940277147122085e-05, |
| "loss": 0.0176, |
| "step": 13400 |
| }, |
| { |
| "epoch": 9.261049723756907, |
| "grad_norm": 0.16968750953674316, |
| "learning_rate": 2.686695258962878e-05, |
| "loss": 0.0211, |
| "step": 13410 |
| }, |
| { |
| "epoch": 9.267955801104973, |
| "grad_norm": 0.19388921558856964, |
| "learning_rate": 2.679369127700375e-05, |
| "loss": 0.0219, |
| "step": 13420 |
| }, |
| { |
| "epoch": 9.274861878453038, |
| "grad_norm": 0.15601925551891327, |
| "learning_rate": 2.672049340954067e-05, |
| "loss": 0.0254, |
| "step": 13430 |
| }, |
| { |
| "epoch": 9.281767955801104, |
| "grad_norm": 0.14723803102970123, |
| "learning_rate": 2.6647359187359676e-05, |
| "loss": 0.021, |
| "step": 13440 |
| }, |
| { |
| "epoch": 9.28867403314917, |
| "grad_norm": 0.1670716404914856, |
| "learning_rate": 2.6574288810406946e-05, |
| "loss": 0.0259, |
| "step": 13450 |
| }, |
| { |
| "epoch": 9.295580110497237, |
| "grad_norm": 0.1267160326242447, |
| "learning_rate": 2.6501282478454083e-05, |
| "loss": 0.0196, |
| "step": 13460 |
| }, |
| { |
| "epoch": 9.302486187845304, |
| "grad_norm": 0.18623648583889008, |
| "learning_rate": 2.6428340391097618e-05, |
| "loss": 0.0249, |
| "step": 13470 |
| }, |
| { |
| "epoch": 9.30939226519337, |
| "grad_norm": 0.15965907275676727, |
| "learning_rate": 2.6355462747758485e-05, |
| "loss": 0.0171, |
| "step": 13480 |
| }, |
| { |
| "epoch": 9.316298342541437, |
| "grad_norm": 0.13149818778038025, |
| "learning_rate": 2.6282649747681304e-05, |
| "loss": 0.0229, |
| "step": 13490 |
| }, |
| { |
| "epoch": 9.323204419889503, |
| "grad_norm": 0.11458833515644073, |
| "learning_rate": 2.620990158993406e-05, |
| "loss": 0.0257, |
| "step": 13500 |
| }, |
| { |
| "epoch": 9.33011049723757, |
| "grad_norm": 0.12951171398162842, |
| "learning_rate": 2.6137218473407477e-05, |
| "loss": 0.0231, |
| "step": 13510 |
| }, |
| { |
| "epoch": 9.337016574585636, |
| "grad_norm": 0.14793288707733154, |
| "learning_rate": 2.606460059681436e-05, |
| "loss": 0.0185, |
| "step": 13520 |
| }, |
| { |
| "epoch": 9.3439226519337, |
| "grad_norm": 0.12049033492803574, |
| "learning_rate": 2.599204815868928e-05, |
| "loss": 0.0178, |
| "step": 13530 |
| }, |
| { |
| "epoch": 9.350828729281767, |
| "grad_norm": 0.17162886261940002, |
| "learning_rate": 2.5919561357387756e-05, |
| "loss": 0.023, |
| "step": 13540 |
| }, |
| { |
| "epoch": 9.357734806629834, |
| "grad_norm": 0.12784846127033234, |
| "learning_rate": 2.5847140391085972e-05, |
| "loss": 0.0243, |
| "step": 13550 |
| }, |
| { |
| "epoch": 9.3646408839779, |
| "grad_norm": 0.12364140897989273, |
| "learning_rate": 2.5774785457780103e-05, |
| "loss": 0.0229, |
| "step": 13560 |
| }, |
| { |
| "epoch": 9.371546961325967, |
| "grad_norm": 0.0785508081316948, |
| "learning_rate": 2.5702496755285753e-05, |
| "loss": 0.0194, |
| "step": 13570 |
| }, |
| { |
| "epoch": 9.378453038674033, |
| "grad_norm": 0.15732654929161072, |
| "learning_rate": 2.5630274481237483e-05, |
| "loss": 0.0246, |
| "step": 13580 |
| }, |
| { |
| "epoch": 9.3853591160221, |
| "grad_norm": 0.12818360328674316, |
| "learning_rate": 2.5558118833088197e-05, |
| "loss": 0.0222, |
| "step": 13590 |
| }, |
| { |
| "epoch": 9.392265193370166, |
| "grad_norm": 0.10840357095003128, |
| "learning_rate": 2.548603000810872e-05, |
| "loss": 0.023, |
| "step": 13600 |
| }, |
| { |
| "epoch": 9.399171270718233, |
| "grad_norm": 0.12543641030788422, |
| "learning_rate": 2.5414008203387152e-05, |
| "loss": 0.0198, |
| "step": 13610 |
| }, |
| { |
| "epoch": 9.4060773480663, |
| "grad_norm": 0.12083574384450912, |
| "learning_rate": 2.534205361582834e-05, |
| "loss": 0.0237, |
| "step": 13620 |
| }, |
| { |
| "epoch": 9.412983425414364, |
| "grad_norm": 0.09807329624891281, |
| "learning_rate": 2.527016644215338e-05, |
| "loss": 0.0224, |
| "step": 13630 |
| }, |
| { |
| "epoch": 9.41988950276243, |
| "grad_norm": 0.16850584745407104, |
| "learning_rate": 2.519834687889905e-05, |
| "loss": 0.0248, |
| "step": 13640 |
| }, |
| { |
| "epoch": 9.426795580110497, |
| "grad_norm": 0.11073677241802216, |
| "learning_rate": 2.5126595122417295e-05, |
| "loss": 0.0183, |
| "step": 13650 |
| }, |
| { |
| "epoch": 9.433701657458563, |
| "grad_norm": 0.12772704660892487, |
| "learning_rate": 2.5054911368874713e-05, |
| "loss": 0.0248, |
| "step": 13660 |
| }, |
| { |
| "epoch": 9.44060773480663, |
| "grad_norm": 0.13772784173488617, |
| "learning_rate": 2.4983295814251916e-05, |
| "loss": 0.0179, |
| "step": 13670 |
| }, |
| { |
| "epoch": 9.447513812154696, |
| "grad_norm": 0.1286805272102356, |
| "learning_rate": 2.4911748654343105e-05, |
| "loss": 0.021, |
| "step": 13680 |
| }, |
| { |
| "epoch": 9.454419889502763, |
| "grad_norm": 0.14480936527252197, |
| "learning_rate": 2.4840270084755463e-05, |
| "loss": 0.0242, |
| "step": 13690 |
| }, |
| { |
| "epoch": 9.46132596685083, |
| "grad_norm": 0.22492210566997528, |
| "learning_rate": 2.4768860300908685e-05, |
| "loss": 0.0227, |
| "step": 13700 |
| }, |
| { |
| "epoch": 9.468232044198896, |
| "grad_norm": 0.15835162997245789, |
| "learning_rate": 2.469751949803443e-05, |
| "loss": 0.0222, |
| "step": 13710 |
| }, |
| { |
| "epoch": 9.475138121546962, |
| "grad_norm": 0.1333426535129547, |
| "learning_rate": 2.4626247871175666e-05, |
| "loss": 0.018, |
| "step": 13720 |
| }, |
| { |
| "epoch": 9.482044198895027, |
| "grad_norm": 0.06573522090911865, |
| "learning_rate": 2.4555045615186346e-05, |
| "loss": 0.0217, |
| "step": 13730 |
| }, |
| { |
| "epoch": 9.488950276243093, |
| "grad_norm": 0.13375243544578552, |
| "learning_rate": 2.4483912924730677e-05, |
| "loss": 0.0205, |
| "step": 13740 |
| }, |
| { |
| "epoch": 9.49585635359116, |
| "grad_norm": 0.14595822989940643, |
| "learning_rate": 2.4412849994282742e-05, |
| "loss": 0.0242, |
| "step": 13750 |
| }, |
| { |
| "epoch": 9.502762430939226, |
| "grad_norm": 0.16502094268798828, |
| "learning_rate": 2.434185701812592e-05, |
| "loss": 0.0196, |
| "step": 13760 |
| }, |
| { |
| "epoch": 9.509668508287293, |
| "grad_norm": 0.15429244935512543, |
| "learning_rate": 2.4270934190352218e-05, |
| "loss": 0.0221, |
| "step": 13770 |
| }, |
| { |
| "epoch": 9.51657458563536, |
| "grad_norm": 0.12383496761322021, |
| "learning_rate": 2.4200081704861998e-05, |
| "loss": 0.0203, |
| "step": 13780 |
| }, |
| { |
| "epoch": 9.523480662983426, |
| "grad_norm": 0.1347363144159317, |
| "learning_rate": 2.412929975536321e-05, |
| "loss": 0.0205, |
| "step": 13790 |
| }, |
| { |
| "epoch": 9.530386740331492, |
| "grad_norm": 0.17767280340194702, |
| "learning_rate": 2.4058588535371017e-05, |
| "loss": 0.0241, |
| "step": 13800 |
| }, |
| { |
| "epoch": 9.537292817679559, |
| "grad_norm": 0.16614821553230286, |
| "learning_rate": 2.3987948238207243e-05, |
| "loss": 0.0261, |
| "step": 13810 |
| }, |
| { |
| "epoch": 9.544198895027625, |
| "grad_norm": 0.15801551938056946, |
| "learning_rate": 2.3917379056999678e-05, |
| "loss": 0.0212, |
| "step": 13820 |
| }, |
| { |
| "epoch": 9.55110497237569, |
| "grad_norm": 0.16081950068473816, |
| "learning_rate": 2.3846881184681824e-05, |
| "loss": 0.022, |
| "step": 13830 |
| }, |
| { |
| "epoch": 9.558011049723756, |
| "grad_norm": 0.18439306318759918, |
| "learning_rate": 2.377645481399214e-05, |
| "loss": 0.0217, |
| "step": 13840 |
| }, |
| { |
| "epoch": 9.564917127071823, |
| "grad_norm": 0.10785002261400223, |
| "learning_rate": 2.3706100137473667e-05, |
| "loss": 0.0198, |
| "step": 13850 |
| }, |
| { |
| "epoch": 9.57182320441989, |
| "grad_norm": 0.12533661723136902, |
| "learning_rate": 2.3635817347473394e-05, |
| "loss": 0.0208, |
| "step": 13860 |
| }, |
| { |
| "epoch": 9.578729281767956, |
| "grad_norm": 0.1306634098291397, |
| "learning_rate": 2.3565606636141757e-05, |
| "loss": 0.0175, |
| "step": 13870 |
| }, |
| { |
| "epoch": 9.585635359116022, |
| "grad_norm": 0.1544395536184311, |
| "learning_rate": 2.3495468195432203e-05, |
| "loss": 0.0192, |
| "step": 13880 |
| }, |
| { |
| "epoch": 9.592541436464089, |
| "grad_norm": 0.0991256982088089, |
| "learning_rate": 2.3425402217100507e-05, |
| "loss": 0.02, |
| "step": 13890 |
| }, |
| { |
| "epoch": 9.599447513812155, |
| "grad_norm": 0.09305687248706818, |
| "learning_rate": 2.3355408892704424e-05, |
| "loss": 0.021, |
| "step": 13900 |
| }, |
| { |
| "epoch": 9.606353591160222, |
| "grad_norm": 0.14391951262950897, |
| "learning_rate": 2.3285488413603003e-05, |
| "loss": 0.0193, |
| "step": 13910 |
| }, |
| { |
| "epoch": 9.613259668508288, |
| "grad_norm": 0.17865988612174988, |
| "learning_rate": 2.321564097095615e-05, |
| "loss": 0.0215, |
| "step": 13920 |
| }, |
| { |
| "epoch": 9.620165745856355, |
| "grad_norm": 0.10739440470933914, |
| "learning_rate": 2.3145866755724142e-05, |
| "loss": 0.0228, |
| "step": 13930 |
| }, |
| { |
| "epoch": 9.62707182320442, |
| "grad_norm": 0.14161740243434906, |
| "learning_rate": 2.307616595866699e-05, |
| "loss": 0.0189, |
| "step": 13940 |
| }, |
| { |
| "epoch": 9.633977900552486, |
| "grad_norm": 0.21180714666843414, |
| "learning_rate": 2.3006538770344032e-05, |
| "loss": 0.0233, |
| "step": 13950 |
| }, |
| { |
| "epoch": 9.640883977900552, |
| "grad_norm": 0.11193030327558517, |
| "learning_rate": 2.293698538111334e-05, |
| "loss": 0.0205, |
| "step": 13960 |
| }, |
| { |
| "epoch": 9.647790055248619, |
| "grad_norm": 0.2148815244436264, |
| "learning_rate": 2.28675059811312e-05, |
| "loss": 0.0193, |
| "step": 13970 |
| }, |
| { |
| "epoch": 9.654696132596685, |
| "grad_norm": 0.13390229642391205, |
| "learning_rate": 2.279810076035167e-05, |
| "loss": 0.02, |
| "step": 13980 |
| }, |
| { |
| "epoch": 9.661602209944752, |
| "grad_norm": 0.0927250012755394, |
| "learning_rate": 2.272876990852596e-05, |
| "loss": 0.0197, |
| "step": 13990 |
| }, |
| { |
| "epoch": 9.668508287292818, |
| "grad_norm": 0.12975889444351196, |
| "learning_rate": 2.265951361520195e-05, |
| "loss": 0.0203, |
| "step": 14000 |
| }, |
| { |
| "epoch": 9.675414364640885, |
| "grad_norm": 0.1706344038248062, |
| "learning_rate": 2.2590332069723748e-05, |
| "loss": 0.019, |
| "step": 14010 |
| }, |
| { |
| "epoch": 9.682320441988951, |
| "grad_norm": 0.21264302730560303, |
| "learning_rate": 2.2521225461231004e-05, |
| "loss": 0.0209, |
| "step": 14020 |
| }, |
| { |
| "epoch": 9.689226519337016, |
| "grad_norm": 0.14462417364120483, |
| "learning_rate": 2.2452193978658597e-05, |
| "loss": 0.0222, |
| "step": 14030 |
| }, |
| { |
| "epoch": 9.696132596685082, |
| "grad_norm": 0.12847472727298737, |
| "learning_rate": 2.238323781073594e-05, |
| "loss": 0.0194, |
| "step": 14040 |
| }, |
| { |
| "epoch": 9.703038674033149, |
| "grad_norm": 0.1844336837530136, |
| "learning_rate": 2.2314357145986552e-05, |
| "loss": 0.0198, |
| "step": 14050 |
| }, |
| { |
| "epoch": 9.709944751381215, |
| "grad_norm": 0.16759341955184937, |
| "learning_rate": 2.224555217272757e-05, |
| "loss": 0.0194, |
| "step": 14060 |
| }, |
| { |
| "epoch": 9.716850828729282, |
| "grad_norm": 0.10547558218240738, |
| "learning_rate": 2.2176823079069127e-05, |
| "loss": 0.0214, |
| "step": 14070 |
| }, |
| { |
| "epoch": 9.723756906077348, |
| "grad_norm": 0.11423495411872864, |
| "learning_rate": 2.210817005291398e-05, |
| "loss": 0.0208, |
| "step": 14080 |
| }, |
| { |
| "epoch": 9.730662983425415, |
| "grad_norm": 0.13519664108753204, |
| "learning_rate": 2.203959328195686e-05, |
| "loss": 0.0195, |
| "step": 14090 |
| }, |
| { |
| "epoch": 9.737569060773481, |
| "grad_norm": 0.14309315383434296, |
| "learning_rate": 2.1971092953684026e-05, |
| "loss": 0.0192, |
| "step": 14100 |
| }, |
| { |
| "epoch": 9.744475138121548, |
| "grad_norm": 0.10871468484401703, |
| "learning_rate": 2.1902669255372788e-05, |
| "loss": 0.0171, |
| "step": 14110 |
| }, |
| { |
| "epoch": 9.751381215469614, |
| "grad_norm": 0.15495355427265167, |
| "learning_rate": 2.1834322374090897e-05, |
| "loss": 0.0203, |
| "step": 14120 |
| }, |
| { |
| "epoch": 9.75828729281768, |
| "grad_norm": 0.10269203782081604, |
| "learning_rate": 2.1766052496696153e-05, |
| "loss": 0.0189, |
| "step": 14130 |
| }, |
| { |
| "epoch": 9.765193370165745, |
| "grad_norm": 0.17774659395217896, |
| "learning_rate": 2.169785980983577e-05, |
| "loss": 0.0239, |
| "step": 14140 |
| }, |
| { |
| "epoch": 9.772099447513812, |
| "grad_norm": 0.12673352658748627, |
| "learning_rate": 2.162974449994593e-05, |
| "loss": 0.02, |
| "step": 14150 |
| }, |
| { |
| "epoch": 9.779005524861878, |
| "grad_norm": 0.12161792069673538, |
| "learning_rate": 2.1561706753251337e-05, |
| "loss": 0.0195, |
| "step": 14160 |
| }, |
| { |
| "epoch": 9.785911602209945, |
| "grad_norm": 0.08473629504442215, |
| "learning_rate": 2.1493746755764544e-05, |
| "loss": 0.0208, |
| "step": 14170 |
| }, |
| { |
| "epoch": 9.792817679558011, |
| "grad_norm": 0.16699481010437012, |
| "learning_rate": 2.1425864693285635e-05, |
| "loss": 0.0224, |
| "step": 14180 |
| }, |
| { |
| "epoch": 9.799723756906078, |
| "grad_norm": 0.11192629486322403, |
| "learning_rate": 2.1358060751401547e-05, |
| "loss": 0.0211, |
| "step": 14190 |
| }, |
| { |
| "epoch": 9.806629834254144, |
| "grad_norm": 0.16285032033920288, |
| "learning_rate": 2.129033511548566e-05, |
| "loss": 0.0197, |
| "step": 14200 |
| }, |
| { |
| "epoch": 9.81353591160221, |
| "grad_norm": 0.117989681661129, |
| "learning_rate": 2.1222687970697315e-05, |
| "loss": 0.0196, |
| "step": 14210 |
| }, |
| { |
| "epoch": 9.820441988950277, |
| "grad_norm": 0.13115745782852173, |
| "learning_rate": 2.1155119501981173e-05, |
| "loss": 0.0244, |
| "step": 14220 |
| }, |
| { |
| "epoch": 9.827348066298342, |
| "grad_norm": 0.2276378720998764, |
| "learning_rate": 2.1087629894066895e-05, |
| "loss": 0.0225, |
| "step": 14230 |
| }, |
| { |
| "epoch": 9.834254143646408, |
| "grad_norm": 0.09082102030515671, |
| "learning_rate": 2.1020219331468473e-05, |
| "loss": 0.0177, |
| "step": 14240 |
| }, |
| { |
| "epoch": 9.841160220994475, |
| "grad_norm": 0.17138603329658508, |
| "learning_rate": 2.095288799848379e-05, |
| "loss": 0.0239, |
| "step": 14250 |
| }, |
| { |
| "epoch": 9.848066298342541, |
| "grad_norm": 0.07798614352941513, |
| "learning_rate": 2.088563607919417e-05, |
| "loss": 0.0206, |
| "step": 14260 |
| }, |
| { |
| "epoch": 9.854972375690608, |
| "grad_norm": 0.14047075808048248, |
| "learning_rate": 2.0818463757463786e-05, |
| "loss": 0.018, |
| "step": 14270 |
| }, |
| { |
| "epoch": 9.861878453038674, |
| "grad_norm": 0.17085087299346924, |
| "learning_rate": 2.0751371216939175e-05, |
| "loss": 0.021, |
| "step": 14280 |
| }, |
| { |
| "epoch": 9.86878453038674, |
| "grad_norm": 0.14489038288593292, |
| "learning_rate": 2.068435864104882e-05, |
| "loss": 0.0159, |
| "step": 14290 |
| }, |
| { |
| "epoch": 9.875690607734807, |
| "grad_norm": 0.11485548317432404, |
| "learning_rate": 2.0617426213002506e-05, |
| "loss": 0.0179, |
| "step": 14300 |
| }, |
| { |
| "epoch": 9.882596685082873, |
| "grad_norm": 0.12750490009784698, |
| "learning_rate": 2.055057411579097e-05, |
| "loss": 0.017, |
| "step": 14310 |
| }, |
| { |
| "epoch": 9.88950276243094, |
| "grad_norm": 0.17045387625694275, |
| "learning_rate": 2.0483802532185286e-05, |
| "loss": 0.0238, |
| "step": 14320 |
| }, |
| { |
| "epoch": 9.896408839779006, |
| "grad_norm": 0.1332184374332428, |
| "learning_rate": 2.041711164473638e-05, |
| "loss": 0.0226, |
| "step": 14330 |
| }, |
| { |
| "epoch": 9.903314917127071, |
| "grad_norm": 0.16046547889709473, |
| "learning_rate": 2.0350501635774637e-05, |
| "loss": 0.0191, |
| "step": 14340 |
| }, |
| { |
| "epoch": 9.910220994475138, |
| "grad_norm": 0.15064607560634613, |
| "learning_rate": 2.0283972687409247e-05, |
| "loss": 0.0225, |
| "step": 14350 |
| }, |
| { |
| "epoch": 9.917127071823204, |
| "grad_norm": 0.11720279604196548, |
| "learning_rate": 2.021752498152784e-05, |
| "loss": 0.0215, |
| "step": 14360 |
| }, |
| { |
| "epoch": 9.92403314917127, |
| "grad_norm": 0.16652604937553406, |
| "learning_rate": 2.015115869979589e-05, |
| "loss": 0.0258, |
| "step": 14370 |
| }, |
| { |
| "epoch": 9.930939226519337, |
| "grad_norm": 0.12095716595649719, |
| "learning_rate": 2.0084874023656265e-05, |
| "loss": 0.022, |
| "step": 14380 |
| }, |
| { |
| "epoch": 9.937845303867404, |
| "grad_norm": 0.0894702821969986, |
| "learning_rate": 2.001867113432877e-05, |
| "loss": 0.0218, |
| "step": 14390 |
| }, |
| { |
| "epoch": 9.94475138121547, |
| "grad_norm": 0.20566357672214508, |
| "learning_rate": 1.995255021280954e-05, |
| "loss": 0.0187, |
| "step": 14400 |
| }, |
| { |
| "epoch": 9.951657458563536, |
| "grad_norm": 0.11899957805871964, |
| "learning_rate": 1.9886511439870688e-05, |
| "loss": 0.0217, |
| "step": 14410 |
| }, |
| { |
| "epoch": 9.958563535911603, |
| "grad_norm": 0.1229172870516777, |
| "learning_rate": 1.9820554996059675e-05, |
| "loss": 0.0197, |
| "step": 14420 |
| }, |
| { |
| "epoch": 9.965469613259668, |
| "grad_norm": 0.16941682994365692, |
| "learning_rate": 1.9754681061698893e-05, |
| "loss": 0.018, |
| "step": 14430 |
| }, |
| { |
| "epoch": 9.972375690607734, |
| "grad_norm": 0.13610994815826416, |
| "learning_rate": 1.9688889816885185e-05, |
| "loss": 0.0182, |
| "step": 14440 |
| }, |
| { |
| "epoch": 9.9792817679558, |
| "grad_norm": 0.14665129780769348, |
| "learning_rate": 1.962318144148928e-05, |
| "loss": 0.02, |
| "step": 14450 |
| }, |
| { |
| "epoch": 9.986187845303867, |
| "grad_norm": 0.13639798760414124, |
| "learning_rate": 1.955755611515539e-05, |
| "loss": 0.0269, |
| "step": 14460 |
| }, |
| { |
| "epoch": 9.993093922651934, |
| "grad_norm": 0.1341027170419693, |
| "learning_rate": 1.9492014017300642e-05, |
| "loss": 0.0203, |
| "step": 14470 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.20185670256614685, |
| "learning_rate": 1.942655532711461e-05, |
| "loss": 0.0208, |
| "step": 14480 |
| }, |
| { |
| "epoch": 10.006906077348066, |
| "grad_norm": 0.09617722779512405, |
| "learning_rate": 1.9361180223558882e-05, |
| "loss": 0.0225, |
| "step": 14490 |
| }, |
| { |
| "epoch": 10.013812154696133, |
| "grad_norm": 0.08813314139842987, |
| "learning_rate": 1.929588888536647e-05, |
| "loss": 0.0171, |
| "step": 14500 |
| }, |
| { |
| "epoch": 10.0207182320442, |
| "grad_norm": 0.10605496168136597, |
| "learning_rate": 1.9230681491041425e-05, |
| "loss": 0.0175, |
| "step": 14510 |
| }, |
| { |
| "epoch": 10.027624309392266, |
| "grad_norm": 0.12312556058168411, |
| "learning_rate": 1.9165558218858264e-05, |
| "loss": 0.0174, |
| "step": 14520 |
| }, |
| { |
| "epoch": 10.034530386740332, |
| "grad_norm": 0.13579706847667694, |
| "learning_rate": 1.9100519246861505e-05, |
| "loss": 0.0179, |
| "step": 14530 |
| }, |
| { |
| "epoch": 10.041436464088397, |
| "grad_norm": 0.15092580020427704, |
| "learning_rate": 1.9035564752865248e-05, |
| "loss": 0.0209, |
| "step": 14540 |
| }, |
| { |
| "epoch": 10.048342541436464, |
| "grad_norm": 0.15202099084854126, |
| "learning_rate": 1.897069491445258e-05, |
| "loss": 0.018, |
| "step": 14550 |
| }, |
| { |
| "epoch": 10.05524861878453, |
| "grad_norm": 0.1487421989440918, |
| "learning_rate": 1.890590990897515e-05, |
| "loss": 0.0234, |
| "step": 14560 |
| }, |
| { |
| "epoch": 10.062154696132596, |
| "grad_norm": 0.14006099104881287, |
| "learning_rate": 1.884120991355272e-05, |
| "loss": 0.0223, |
| "step": 14570 |
| }, |
| { |
| "epoch": 10.069060773480663, |
| "grad_norm": 0.09489653259515762, |
| "learning_rate": 1.8776595105072576e-05, |
| "loss": 0.0203, |
| "step": 14580 |
| }, |
| { |
| "epoch": 10.07596685082873, |
| "grad_norm": 0.09173377603292465, |
| "learning_rate": 1.8712065660189166e-05, |
| "loss": 0.0186, |
| "step": 14590 |
| }, |
| { |
| "epoch": 10.082872928176796, |
| "grad_norm": 0.17884795367717743, |
| "learning_rate": 1.8647621755323513e-05, |
| "loss": 0.0182, |
| "step": 14600 |
| }, |
| { |
| "epoch": 10.089779005524862, |
| "grad_norm": 0.1608448177576065, |
| "learning_rate": 1.858326356666278e-05, |
| "loss": 0.0194, |
| "step": 14610 |
| }, |
| { |
| "epoch": 10.096685082872929, |
| "grad_norm": 0.11083773523569107, |
| "learning_rate": 1.851899127015983e-05, |
| "loss": 0.0197, |
| "step": 14620 |
| }, |
| { |
| "epoch": 10.103591160220995, |
| "grad_norm": 0.1610211730003357, |
| "learning_rate": 1.8454805041532626e-05, |
| "loss": 0.0244, |
| "step": 14630 |
| }, |
| { |
| "epoch": 10.11049723756906, |
| "grad_norm": 0.16736485064029694, |
| "learning_rate": 1.8390705056263906e-05, |
| "loss": 0.0214, |
| "step": 14640 |
| }, |
| { |
| "epoch": 10.117403314917127, |
| "grad_norm": 0.16821861267089844, |
| "learning_rate": 1.832669148960057e-05, |
| "loss": 0.0209, |
| "step": 14650 |
| }, |
| { |
| "epoch": 10.124309392265193, |
| "grad_norm": 0.14552289247512817, |
| "learning_rate": 1.8262764516553233e-05, |
| "loss": 0.0197, |
| "step": 14660 |
| }, |
| { |
| "epoch": 10.13121546961326, |
| "grad_norm": 0.24827535450458527, |
| "learning_rate": 1.8198924311895843e-05, |
| "loss": 0.0173, |
| "step": 14670 |
| }, |
| { |
| "epoch": 10.138121546961326, |
| "grad_norm": 0.19792486727237701, |
| "learning_rate": 1.813517105016505e-05, |
| "loss": 0.0222, |
| "step": 14680 |
| }, |
| { |
| "epoch": 10.145027624309392, |
| "grad_norm": 0.1277136504650116, |
| "learning_rate": 1.8071504905659888e-05, |
| "loss": 0.0168, |
| "step": 14690 |
| }, |
| { |
| "epoch": 10.151933701657459, |
| "grad_norm": 0.11872461438179016, |
| "learning_rate": 1.800792605244109e-05, |
| "loss": 0.0197, |
| "step": 14700 |
| }, |
| { |
| "epoch": 10.158839779005525, |
| "grad_norm": 0.16362428665161133, |
| "learning_rate": 1.7944434664330844e-05, |
| "loss": 0.0286, |
| "step": 14710 |
| }, |
| { |
| "epoch": 10.165745856353592, |
| "grad_norm": 0.1249469742178917, |
| "learning_rate": 1.7881030914912212e-05, |
| "loss": 0.0228, |
| "step": 14720 |
| }, |
| { |
| "epoch": 10.172651933701658, |
| "grad_norm": 0.1482723355293274, |
| "learning_rate": 1.7817714977528577e-05, |
| "loss": 0.0206, |
| "step": 14730 |
| }, |
| { |
| "epoch": 10.179558011049723, |
| "grad_norm": 0.12862233817577362, |
| "learning_rate": 1.7754487025283332e-05, |
| "loss": 0.0246, |
| "step": 14740 |
| }, |
| { |
| "epoch": 10.18646408839779, |
| "grad_norm": 0.14729537069797516, |
| "learning_rate": 1.7691347231039275e-05, |
| "loss": 0.0197, |
| "step": 14750 |
| }, |
| { |
| "epoch": 10.193370165745856, |
| "grad_norm": 0.16883131861686707, |
| "learning_rate": 1.7628295767418164e-05, |
| "loss": 0.0228, |
| "step": 14760 |
| }, |
| { |
| "epoch": 10.200276243093922, |
| "grad_norm": 0.15366175770759583, |
| "learning_rate": 1.7565332806800333e-05, |
| "loss": 0.0232, |
| "step": 14770 |
| }, |
| { |
| "epoch": 10.207182320441989, |
| "grad_norm": 0.15509574115276337, |
| "learning_rate": 1.750245852132408e-05, |
| "loss": 0.0203, |
| "step": 14780 |
| }, |
| { |
| "epoch": 10.214088397790055, |
| "grad_norm": 0.14112603664398193, |
| "learning_rate": 1.7439673082885323e-05, |
| "loss": 0.019, |
| "step": 14790 |
| }, |
| { |
| "epoch": 10.220994475138122, |
| "grad_norm": 0.09568461030721664, |
| "learning_rate": 1.7376976663137047e-05, |
| "loss": 0.0193, |
| "step": 14800 |
| }, |
| { |
| "epoch": 10.227900552486188, |
| "grad_norm": 0.12783320248126984, |
| "learning_rate": 1.7314369433488853e-05, |
| "loss": 0.0176, |
| "step": 14810 |
| }, |
| { |
| "epoch": 10.234806629834255, |
| "grad_norm": 0.09211356937885284, |
| "learning_rate": 1.7251851565106548e-05, |
| "loss": 0.0177, |
| "step": 14820 |
| }, |
| { |
| "epoch": 10.241712707182321, |
| "grad_norm": 0.15022572875022888, |
| "learning_rate": 1.7189423228911574e-05, |
| "loss": 0.0205, |
| "step": 14830 |
| }, |
| { |
| "epoch": 10.248618784530386, |
| "grad_norm": 0.09745365381240845, |
| "learning_rate": 1.7127084595580606e-05, |
| "loss": 0.0186, |
| "step": 14840 |
| }, |
| { |
| "epoch": 10.255524861878452, |
| "grad_norm": 0.16210335493087769, |
| "learning_rate": 1.706483583554513e-05, |
| "loss": 0.0207, |
| "step": 14850 |
| }, |
| { |
| "epoch": 10.262430939226519, |
| "grad_norm": 0.13680025935173035, |
| "learning_rate": 1.700267711899083e-05, |
| "loss": 0.0204, |
| "step": 14860 |
| }, |
| { |
| "epoch": 10.269337016574585, |
| "grad_norm": 0.11462069302797318, |
| "learning_rate": 1.69406086158573e-05, |
| "loss": 0.0181, |
| "step": 14870 |
| }, |
| { |
| "epoch": 10.276243093922652, |
| "grad_norm": 0.12634550034999847, |
| "learning_rate": 1.6878630495837455e-05, |
| "loss": 0.0207, |
| "step": 14880 |
| }, |
| { |
| "epoch": 10.283149171270718, |
| "grad_norm": 0.16681039333343506, |
| "learning_rate": 1.681674292837707e-05, |
| "loss": 0.02, |
| "step": 14890 |
| }, |
| { |
| "epoch": 10.290055248618785, |
| "grad_norm": 0.11668930947780609, |
| "learning_rate": 1.6754946082674444e-05, |
| "loss": 0.0203, |
| "step": 14900 |
| }, |
| { |
| "epoch": 10.296961325966851, |
| "grad_norm": 0.17742110788822174, |
| "learning_rate": 1.6693240127679748e-05, |
| "loss": 0.018, |
| "step": 14910 |
| }, |
| { |
| "epoch": 10.303867403314918, |
| "grad_norm": 0.13763536512851715, |
| "learning_rate": 1.663162523209475e-05, |
| "loss": 0.0254, |
| "step": 14920 |
| }, |
| { |
| "epoch": 10.310773480662984, |
| "grad_norm": 0.07589291781187057, |
| "learning_rate": 1.6570101564372193e-05, |
| "loss": 0.0219, |
| "step": 14930 |
| }, |
| { |
| "epoch": 10.317679558011049, |
| "grad_norm": 0.14768086373806, |
| "learning_rate": 1.650866929271543e-05, |
| "loss": 0.0206, |
| "step": 14940 |
| }, |
| { |
| "epoch": 10.324585635359115, |
| "grad_norm": 0.1339852213859558, |
| "learning_rate": 1.644732858507797e-05, |
| "loss": 0.0208, |
| "step": 14950 |
| }, |
| { |
| "epoch": 10.331491712707182, |
| "grad_norm": 0.15526872873306274, |
| "learning_rate": 1.6386079609162943e-05, |
| "loss": 0.0179, |
| "step": 14960 |
| }, |
| { |
| "epoch": 10.338397790055248, |
| "grad_norm": 0.08692698180675507, |
| "learning_rate": 1.6324922532422742e-05, |
| "loss": 0.015, |
| "step": 14970 |
| }, |
| { |
| "epoch": 10.345303867403315, |
| "grad_norm": 0.12334548681974411, |
| "learning_rate": 1.6263857522058434e-05, |
| "loss": 0.0179, |
| "step": 14980 |
| }, |
| { |
| "epoch": 10.352209944751381, |
| "grad_norm": 0.15830978751182556, |
| "learning_rate": 1.6202884745019443e-05, |
| "loss": 0.0196, |
| "step": 14990 |
| }, |
| { |
| "epoch": 10.359116022099448, |
| "grad_norm": 0.14715401828289032, |
| "learning_rate": 1.614200436800304e-05, |
| "loss": 0.0188, |
| "step": 15000 |
| }, |
| { |
| "epoch": 10.366022099447514, |
| "grad_norm": 0.13705182075500488, |
| "learning_rate": 1.6081216557453814e-05, |
| "loss": 0.0179, |
| "step": 15010 |
| }, |
| { |
| "epoch": 10.37292817679558, |
| "grad_norm": 0.09494274854660034, |
| "learning_rate": 1.6020521479563367e-05, |
| "loss": 0.0169, |
| "step": 15020 |
| }, |
| { |
| "epoch": 10.379834254143647, |
| "grad_norm": 0.16117554903030396, |
| "learning_rate": 1.5959919300269654e-05, |
| "loss": 0.0225, |
| "step": 15030 |
| }, |
| { |
| "epoch": 10.386740331491712, |
| "grad_norm": 0.12154057621955872, |
| "learning_rate": 1.5899410185256764e-05, |
| "loss": 0.0198, |
| "step": 15040 |
| }, |
| { |
| "epoch": 10.393646408839778, |
| "grad_norm": 0.15817032754421234, |
| "learning_rate": 1.583899429995431e-05, |
| "loss": 0.0231, |
| "step": 15050 |
| }, |
| { |
| "epoch": 10.400552486187845, |
| "grad_norm": 0.20743706822395325, |
| "learning_rate": 1.5778671809536993e-05, |
| "loss": 0.0198, |
| "step": 15060 |
| }, |
| { |
| "epoch": 10.407458563535911, |
| "grad_norm": 0.10531453043222427, |
| "learning_rate": 1.5718442878924246e-05, |
| "loss": 0.02, |
| "step": 15070 |
| }, |
| { |
| "epoch": 10.414364640883978, |
| "grad_norm": 0.13929533958435059, |
| "learning_rate": 1.5658307672779593e-05, |
| "loss": 0.016, |
| "step": 15080 |
| }, |
| { |
| "epoch": 10.421270718232044, |
| "grad_norm": 0.13578106462955475, |
| "learning_rate": 1.5598266355510427e-05, |
| "loss": 0.0199, |
| "step": 15090 |
| }, |
| { |
| "epoch": 10.42817679558011, |
| "grad_norm": 0.19008758664131165, |
| "learning_rate": 1.553831909126744e-05, |
| "loss": 0.0187, |
| "step": 15100 |
| }, |
| { |
| "epoch": 10.435082872928177, |
| "grad_norm": 0.14955757558345795, |
| "learning_rate": 1.5478466043944135e-05, |
| "loss": 0.0212, |
| "step": 15110 |
| }, |
| { |
| "epoch": 10.441988950276244, |
| "grad_norm": 0.1234457865357399, |
| "learning_rate": 1.5418707377176468e-05, |
| "loss": 0.0266, |
| "step": 15120 |
| }, |
| { |
| "epoch": 10.44889502762431, |
| "grad_norm": 0.1302083134651184, |
| "learning_rate": 1.535904325434233e-05, |
| "loss": 0.0185, |
| "step": 15130 |
| }, |
| { |
| "epoch": 10.455801104972375, |
| "grad_norm": 0.09352464973926544, |
| "learning_rate": 1.529947383856118e-05, |
| "loss": 0.019, |
| "step": 15140 |
| }, |
| { |
| "epoch": 10.462707182320441, |
| "grad_norm": 0.20320549607276917, |
| "learning_rate": 1.5239999292693524e-05, |
| "loss": 0.0179, |
| "step": 15150 |
| }, |
| { |
| "epoch": 10.469613259668508, |
| "grad_norm": 0.13011567294597626, |
| "learning_rate": 1.5180619779340505e-05, |
| "loss": 0.017, |
| "step": 15160 |
| }, |
| { |
| "epoch": 10.476519337016574, |
| "grad_norm": 0.1492019146680832, |
| "learning_rate": 1.5121335460843428e-05, |
| "loss": 0.0189, |
| "step": 15170 |
| }, |
| { |
| "epoch": 10.48342541436464, |
| "grad_norm": 0.12916570901870728, |
| "learning_rate": 1.5062146499283347e-05, |
| "loss": 0.0194, |
| "step": 15180 |
| }, |
| { |
| "epoch": 10.490331491712707, |
| "grad_norm": 0.16759514808654785, |
| "learning_rate": 1.5003053056480643e-05, |
| "loss": 0.0205, |
| "step": 15190 |
| }, |
| { |
| "epoch": 10.497237569060774, |
| "grad_norm": 0.13533750176429749, |
| "learning_rate": 1.4944055293994551e-05, |
| "loss": 0.0231, |
| "step": 15200 |
| }, |
| { |
| "epoch": 10.50414364640884, |
| "grad_norm": 0.14893312752246857, |
| "learning_rate": 1.4885153373122656e-05, |
| "loss": 0.0172, |
| "step": 15210 |
| }, |
| { |
| "epoch": 10.511049723756907, |
| "grad_norm": 0.10746175050735474, |
| "learning_rate": 1.482634745490059e-05, |
| "loss": 0.0159, |
| "step": 15220 |
| }, |
| { |
| "epoch": 10.517955801104973, |
| "grad_norm": 0.14947925508022308, |
| "learning_rate": 1.4767637700101466e-05, |
| "loss": 0.0188, |
| "step": 15230 |
| }, |
| { |
| "epoch": 10.524861878453038, |
| "grad_norm": 0.14596012234687805, |
| "learning_rate": 1.4709024269235528e-05, |
| "loss": 0.0204, |
| "step": 15240 |
| }, |
| { |
| "epoch": 10.531767955801104, |
| "grad_norm": 0.11377096176147461, |
| "learning_rate": 1.4650507322549684e-05, |
| "loss": 0.0213, |
| "step": 15250 |
| }, |
| { |
| "epoch": 10.53867403314917, |
| "grad_norm": 0.2057703733444214, |
| "learning_rate": 1.4592087020026972e-05, |
| "loss": 0.0188, |
| "step": 15260 |
| }, |
| { |
| "epoch": 10.545580110497237, |
| "grad_norm": 0.11566067487001419, |
| "learning_rate": 1.4533763521386318e-05, |
| "loss": 0.0197, |
| "step": 15270 |
| }, |
| { |
| "epoch": 10.552486187845304, |
| "grad_norm": 0.15610185265541077, |
| "learning_rate": 1.44755369860819e-05, |
| "loss": 0.0197, |
| "step": 15280 |
| }, |
| { |
| "epoch": 10.55939226519337, |
| "grad_norm": 0.11242447793483734, |
| "learning_rate": 1.441740757330287e-05, |
| "loss": 0.0202, |
| "step": 15290 |
| }, |
| { |
| "epoch": 10.566298342541437, |
| "grad_norm": 0.18600213527679443, |
| "learning_rate": 1.4359375441972844e-05, |
| "loss": 0.0233, |
| "step": 15300 |
| }, |
| { |
| "epoch": 10.573204419889503, |
| "grad_norm": 0.19395959377288818, |
| "learning_rate": 1.4301440750749395e-05, |
| "loss": 0.0213, |
| "step": 15310 |
| }, |
| { |
| "epoch": 10.58011049723757, |
| "grad_norm": 0.14071722328662872, |
| "learning_rate": 1.4243603658023808e-05, |
| "loss": 0.024, |
| "step": 15320 |
| }, |
| { |
| "epoch": 10.587016574585636, |
| "grad_norm": 0.16488364338874817, |
| "learning_rate": 1.4185864321920444e-05, |
| "loss": 0.026, |
| "step": 15330 |
| }, |
| { |
| "epoch": 10.5939226519337, |
| "grad_norm": 0.1464218646287918, |
| "learning_rate": 1.4128222900296485e-05, |
| "loss": 0.0211, |
| "step": 15340 |
| }, |
| { |
| "epoch": 10.600828729281767, |
| "grad_norm": 0.15647467970848083, |
| "learning_rate": 1.407067955074135e-05, |
| "loss": 0.0197, |
| "step": 15350 |
| }, |
| { |
| "epoch": 10.607734806629834, |
| "grad_norm": 0.11905504763126373, |
| "learning_rate": 1.4013234430576356e-05, |
| "loss": 0.0173, |
| "step": 15360 |
| }, |
| { |
| "epoch": 10.6146408839779, |
| "grad_norm": 0.09630614519119263, |
| "learning_rate": 1.3955887696854286e-05, |
| "loss": 0.0178, |
| "step": 15370 |
| }, |
| { |
| "epoch": 10.621546961325967, |
| "grad_norm": 0.15738511085510254, |
| "learning_rate": 1.38986395063589e-05, |
| "loss": 0.0189, |
| "step": 15380 |
| }, |
| { |
| "epoch": 10.628453038674033, |
| "grad_norm": 0.07597813010215759, |
| "learning_rate": 1.3841490015604597e-05, |
| "loss": 0.0179, |
| "step": 15390 |
| }, |
| { |
| "epoch": 10.6353591160221, |
| "grad_norm": 0.15570463240146637, |
| "learning_rate": 1.3784439380835879e-05, |
| "loss": 0.0257, |
| "step": 15400 |
| }, |
| { |
| "epoch": 10.642265193370166, |
| "grad_norm": 0.09338031709194183, |
| "learning_rate": 1.3727487758026986e-05, |
| "loss": 0.02, |
| "step": 15410 |
| }, |
| { |
| "epoch": 10.649171270718233, |
| "grad_norm": 0.14082670211791992, |
| "learning_rate": 1.3670635302881525e-05, |
| "loss": 0.0232, |
| "step": 15420 |
| }, |
| { |
| "epoch": 10.6560773480663, |
| "grad_norm": 0.13374818861484528, |
| "learning_rate": 1.3613882170831888e-05, |
| "loss": 0.0169, |
| "step": 15430 |
| }, |
| { |
| "epoch": 10.662983425414364, |
| "grad_norm": 0.13222579658031464, |
| "learning_rate": 1.355722851703901e-05, |
| "loss": 0.0191, |
| "step": 15440 |
| }, |
| { |
| "epoch": 10.66988950276243, |
| "grad_norm": 0.12320950627326965, |
| "learning_rate": 1.3500674496391814e-05, |
| "loss": 0.0195, |
| "step": 15450 |
| }, |
| { |
| "epoch": 10.676795580110497, |
| "grad_norm": 0.09267281740903854, |
| "learning_rate": 1.3444220263506795e-05, |
| "loss": 0.018, |
| "step": 15460 |
| }, |
| { |
| "epoch": 10.683701657458563, |
| "grad_norm": 0.14765200018882751, |
| "learning_rate": 1.3387865972727714e-05, |
| "loss": 0.0206, |
| "step": 15470 |
| }, |
| { |
| "epoch": 10.69060773480663, |
| "grad_norm": 0.14930613338947296, |
| "learning_rate": 1.3331611778125036e-05, |
| "loss": 0.0188, |
| "step": 15480 |
| }, |
| { |
| "epoch": 10.697513812154696, |
| "grad_norm": 0.1355215162038803, |
| "learning_rate": 1.3275457833495564e-05, |
| "loss": 0.0217, |
| "step": 15490 |
| }, |
| { |
| "epoch": 10.704419889502763, |
| "grad_norm": 0.13247044384479523, |
| "learning_rate": 1.3219404292362065e-05, |
| "loss": 0.0172, |
| "step": 15500 |
| }, |
| { |
| "epoch": 10.71132596685083, |
| "grad_norm": 0.14117850363254547, |
| "learning_rate": 1.3163451307972751e-05, |
| "loss": 0.0184, |
| "step": 15510 |
| }, |
| { |
| "epoch": 10.718232044198896, |
| "grad_norm": 0.12379027158021927, |
| "learning_rate": 1.3107599033300977e-05, |
| "loss": 0.0191, |
| "step": 15520 |
| }, |
| { |
| "epoch": 10.725138121546962, |
| "grad_norm": 0.19423779845237732, |
| "learning_rate": 1.305184762104471e-05, |
| "loss": 0.0259, |
| "step": 15530 |
| }, |
| { |
| "epoch": 10.732044198895027, |
| "grad_norm": 0.14055989682674408, |
| "learning_rate": 1.2996197223626178e-05, |
| "loss": 0.0175, |
| "step": 15540 |
| }, |
| { |
| "epoch": 10.738950276243093, |
| "grad_norm": 0.11190267652273178, |
| "learning_rate": 1.2940647993191457e-05, |
| "loss": 0.019, |
| "step": 15550 |
| }, |
| { |
| "epoch": 10.74585635359116, |
| "grad_norm": 0.14761070907115936, |
| "learning_rate": 1.2885200081610005e-05, |
| "loss": 0.0192, |
| "step": 15560 |
| }, |
| { |
| "epoch": 10.752762430939226, |
| "grad_norm": 0.136946901679039, |
| "learning_rate": 1.2829853640474316e-05, |
| "loss": 0.0189, |
| "step": 15570 |
| }, |
| { |
| "epoch": 10.759668508287293, |
| "grad_norm": 0.13317197561264038, |
| "learning_rate": 1.2774608821099438e-05, |
| "loss": 0.0192, |
| "step": 15580 |
| }, |
| { |
| "epoch": 10.76657458563536, |
| "grad_norm": 0.1472024768590927, |
| "learning_rate": 1.2719465774522577e-05, |
| "loss": 0.0206, |
| "step": 15590 |
| }, |
| { |
| "epoch": 10.773480662983426, |
| "grad_norm": 0.15507635474205017, |
| "learning_rate": 1.2664424651502755e-05, |
| "loss": 0.0185, |
| "step": 15600 |
| }, |
| { |
| "epoch": 10.780386740331492, |
| "grad_norm": 0.1318316012620926, |
| "learning_rate": 1.260948560252026e-05, |
| "loss": 0.0171, |
| "step": 15610 |
| }, |
| { |
| "epoch": 10.787292817679559, |
| "grad_norm": 0.11615798622369766, |
| "learning_rate": 1.2554648777776396e-05, |
| "loss": 0.0209, |
| "step": 15620 |
| }, |
| { |
| "epoch": 10.794198895027625, |
| "grad_norm": 0.16774071753025055, |
| "learning_rate": 1.2499914327192919e-05, |
| "loss": 0.0166, |
| "step": 15630 |
| }, |
| { |
| "epoch": 10.80110497237569, |
| "grad_norm": 0.21441291272640228, |
| "learning_rate": 1.2445282400411722e-05, |
| "loss": 0.0197, |
| "step": 15640 |
| }, |
| { |
| "epoch": 10.808011049723756, |
| "grad_norm": 0.11902355402708054, |
| "learning_rate": 1.2390753146794437e-05, |
| "loss": 0.0225, |
| "step": 15650 |
| }, |
| { |
| "epoch": 10.814917127071823, |
| "grad_norm": 0.14514003694057465, |
| "learning_rate": 1.2336326715421925e-05, |
| "loss": 0.0187, |
| "step": 15660 |
| }, |
| { |
| "epoch": 10.82182320441989, |
| "grad_norm": 0.1624397486448288, |
| "learning_rate": 1.2282003255094005e-05, |
| "loss": 0.0223, |
| "step": 15670 |
| }, |
| { |
| "epoch": 10.828729281767956, |
| "grad_norm": 0.12542851269245148, |
| "learning_rate": 1.2227782914328928e-05, |
| "loss": 0.0193, |
| "step": 15680 |
| }, |
| { |
| "epoch": 10.835635359116022, |
| "grad_norm": 0.13802413642406464, |
| "learning_rate": 1.2173665841363018e-05, |
| "loss": 0.0179, |
| "step": 15690 |
| }, |
| { |
| "epoch": 10.842541436464089, |
| "grad_norm": 0.0999394953250885, |
| "learning_rate": 1.211965218415032e-05, |
| "loss": 0.0182, |
| "step": 15700 |
| }, |
| { |
| "epoch": 10.849447513812155, |
| "grad_norm": 0.1239611878991127, |
| "learning_rate": 1.2065742090362082e-05, |
| "loss": 0.022, |
| "step": 15710 |
| }, |
| { |
| "epoch": 10.856353591160222, |
| "grad_norm": 0.11930327117443085, |
| "learning_rate": 1.2011935707386457e-05, |
| "loss": 0.017, |
| "step": 15720 |
| }, |
| { |
| "epoch": 10.863259668508288, |
| "grad_norm": 0.12612630426883698, |
| "learning_rate": 1.1958233182328044e-05, |
| "loss": 0.0177, |
| "step": 15730 |
| }, |
| { |
| "epoch": 10.870165745856355, |
| "grad_norm": 0.16491909325122833, |
| "learning_rate": 1.1904634662007474e-05, |
| "loss": 0.0216, |
| "step": 15740 |
| }, |
| { |
| "epoch": 10.87707182320442, |
| "grad_norm": 0.141819030046463, |
| "learning_rate": 1.1851140292961088e-05, |
| "loss": 0.0179, |
| "step": 15750 |
| }, |
| { |
| "epoch": 10.883977900552486, |
| "grad_norm": 0.12252664566040039, |
| "learning_rate": 1.1797750221440424e-05, |
| "loss": 0.0189, |
| "step": 15760 |
| }, |
| { |
| "epoch": 10.890883977900552, |
| "grad_norm": 0.1347576379776001, |
| "learning_rate": 1.1744464593411897e-05, |
| "loss": 0.0229, |
| "step": 15770 |
| }, |
| { |
| "epoch": 10.897790055248619, |
| "grad_norm": 0.0848279520869255, |
| "learning_rate": 1.1691283554556399e-05, |
| "loss": 0.0179, |
| "step": 15780 |
| }, |
| { |
| "epoch": 10.904696132596685, |
| "grad_norm": 0.15215791761875153, |
| "learning_rate": 1.1638207250268834e-05, |
| "loss": 0.0205, |
| "step": 15790 |
| }, |
| { |
| "epoch": 10.911602209944752, |
| "grad_norm": 0.1396626979112625, |
| "learning_rate": 1.158523582565782e-05, |
| "loss": 0.0206, |
| "step": 15800 |
| }, |
| { |
| "epoch": 10.918508287292818, |
| "grad_norm": 0.1636439710855484, |
| "learning_rate": 1.1532369425545192e-05, |
| "loss": 0.0202, |
| "step": 15810 |
| }, |
| { |
| "epoch": 10.925414364640885, |
| "grad_norm": 0.13826753199100494, |
| "learning_rate": 1.1479608194465662e-05, |
| "loss": 0.0223, |
| "step": 15820 |
| }, |
| { |
| "epoch": 10.932320441988951, |
| "grad_norm": 0.14061783254146576, |
| "learning_rate": 1.1426952276666442e-05, |
| "loss": 0.0193, |
| "step": 15830 |
| }, |
| { |
| "epoch": 10.939226519337016, |
| "grad_norm": 0.08424236625432968, |
| "learning_rate": 1.1374401816106778e-05, |
| "loss": 0.0205, |
| "step": 15840 |
| }, |
| { |
| "epoch": 10.946132596685082, |
| "grad_norm": 0.1465301215648651, |
| "learning_rate": 1.1321956956457646e-05, |
| "loss": 0.0242, |
| "step": 15850 |
| }, |
| { |
| "epoch": 10.953038674033149, |
| "grad_norm": 0.10818585008382797, |
| "learning_rate": 1.1269617841101277e-05, |
| "loss": 0.0158, |
| "step": 15860 |
| }, |
| { |
| "epoch": 10.959944751381215, |
| "grad_norm": 0.11729062348604202, |
| "learning_rate": 1.1217384613130804e-05, |
| "loss": 0.0194, |
| "step": 15870 |
| }, |
| { |
| "epoch": 10.966850828729282, |
| "grad_norm": 0.13734790682792664, |
| "learning_rate": 1.11652574153499e-05, |
| "loss": 0.0211, |
| "step": 15880 |
| }, |
| { |
| "epoch": 10.973756906077348, |
| "grad_norm": 0.10207629203796387, |
| "learning_rate": 1.1113236390272303e-05, |
| "loss": 0.0191, |
| "step": 15890 |
| }, |
| { |
| "epoch": 10.980662983425415, |
| "grad_norm": 0.09973394870758057, |
| "learning_rate": 1.106132168012155e-05, |
| "loss": 0.0218, |
| "step": 15900 |
| }, |
| { |
| "epoch": 10.987569060773481, |
| "grad_norm": 0.178752601146698, |
| "learning_rate": 1.1009513426830448e-05, |
| "loss": 0.0224, |
| "step": 15910 |
| }, |
| { |
| "epoch": 10.994475138121548, |
| "grad_norm": 0.1685081571340561, |
| "learning_rate": 1.0957811772040777e-05, |
| "loss": 0.0186, |
| "step": 15920 |
| }, |
| { |
| "epoch": 11.001381215469614, |
| "grad_norm": 0.17715397477149963, |
| "learning_rate": 1.0906216857102913e-05, |
| "loss": 0.0268, |
| "step": 15930 |
| }, |
| { |
| "epoch": 11.008287292817679, |
| "grad_norm": 0.1387261301279068, |
| "learning_rate": 1.0854728823075355e-05, |
| "loss": 0.017, |
| "step": 15940 |
| }, |
| { |
| "epoch": 11.015193370165745, |
| "grad_norm": 0.15848971903324127, |
| "learning_rate": 1.0803347810724452e-05, |
| "loss": 0.0198, |
| "step": 15950 |
| }, |
| { |
| "epoch": 11.022099447513812, |
| "grad_norm": 0.16126196086406708, |
| "learning_rate": 1.0752073960523911e-05, |
| "loss": 0.0239, |
| "step": 15960 |
| }, |
| { |
| "epoch": 11.029005524861878, |
| "grad_norm": 0.1097811833024025, |
| "learning_rate": 1.070090741265447e-05, |
| "loss": 0.019, |
| "step": 15970 |
| }, |
| { |
| "epoch": 11.035911602209945, |
| "grad_norm": 0.10773305594921112, |
| "learning_rate": 1.0649848307003547e-05, |
| "loss": 0.0189, |
| "step": 15980 |
| }, |
| { |
| "epoch": 11.042817679558011, |
| "grad_norm": 0.13943029940128326, |
| "learning_rate": 1.0598896783164757e-05, |
| "loss": 0.0205, |
| "step": 15990 |
| }, |
| { |
| "epoch": 11.049723756906078, |
| "grad_norm": 0.16361400485038757, |
| "learning_rate": 1.0548052980437645e-05, |
| "loss": 0.0186, |
| "step": 16000 |
| }, |
| { |
| "epoch": 11.056629834254144, |
| "grad_norm": 0.18143020570278168, |
| "learning_rate": 1.049731703782722e-05, |
| "loss": 0.0164, |
| "step": 16010 |
| }, |
| { |
| "epoch": 11.06353591160221, |
| "grad_norm": 0.11211825162172318, |
| "learning_rate": 1.0446689094043587e-05, |
| "loss": 0.0148, |
| "step": 16020 |
| }, |
| { |
| "epoch": 11.070441988950277, |
| "grad_norm": 0.11881590634584427, |
| "learning_rate": 1.039616928750165e-05, |
| "loss": 0.0184, |
| "step": 16030 |
| }, |
| { |
| "epoch": 11.077348066298342, |
| "grad_norm": 0.19520686566829681, |
| "learning_rate": 1.0345757756320612e-05, |
| "loss": 0.0205, |
| "step": 16040 |
| }, |
| { |
| "epoch": 11.084254143646408, |
| "grad_norm": 0.1139756515622139, |
| "learning_rate": 1.0295454638323666e-05, |
| "loss": 0.019, |
| "step": 16050 |
| }, |
| { |
| "epoch": 11.091160220994475, |
| "grad_norm": 0.15939398109912872, |
| "learning_rate": 1.0245260071037632e-05, |
| "loss": 0.0199, |
| "step": 16060 |
| }, |
| { |
| "epoch": 11.098066298342541, |
| "grad_norm": 0.12053225189447403, |
| "learning_rate": 1.0195174191692518e-05, |
| "loss": 0.016, |
| "step": 16070 |
| }, |
| { |
| "epoch": 11.104972375690608, |
| "grad_norm": 0.1569368690252304, |
| "learning_rate": 1.014519713722124e-05, |
| "loss": 0.0212, |
| "step": 16080 |
| }, |
| { |
| "epoch": 11.111878453038674, |
| "grad_norm": 0.12836386263370514, |
| "learning_rate": 1.0095329044259132e-05, |
| "loss": 0.0215, |
| "step": 16090 |
| }, |
| { |
| "epoch": 11.11878453038674, |
| "grad_norm": 0.23310133814811707, |
| "learning_rate": 1.004557004914365e-05, |
| "loss": 0.0205, |
| "step": 16100 |
| }, |
| { |
| "epoch": 11.125690607734807, |
| "grad_norm": 0.1868610829114914, |
| "learning_rate": 9.995920287914007e-06, |
| "loss": 0.0196, |
| "step": 16110 |
| }, |
| { |
| "epoch": 11.132596685082873, |
| "grad_norm": 0.17721503973007202, |
| "learning_rate": 9.946379896310737e-06, |
| "loss": 0.0188, |
| "step": 16120 |
| }, |
| { |
| "epoch": 11.13950276243094, |
| "grad_norm": 0.14387953281402588, |
| "learning_rate": 9.896949009775396e-06, |
| "loss": 0.0185, |
| "step": 16130 |
| }, |
| { |
| "epoch": 11.146408839779005, |
| "grad_norm": 0.12724824249744415, |
| "learning_rate": 9.847627763450134e-06, |
| "loss": 0.0178, |
| "step": 16140 |
| }, |
| { |
| "epoch": 11.153314917127071, |
| "grad_norm": 0.13191691040992737, |
| "learning_rate": 9.798416292177337e-06, |
| "loss": 0.0178, |
| "step": 16150 |
| }, |
| { |
| "epoch": 11.160220994475138, |
| "grad_norm": 0.1462077796459198, |
| "learning_rate": 9.74931473049932e-06, |
| "loss": 0.0232, |
| "step": 16160 |
| }, |
| { |
| "epoch": 11.167127071823204, |
| "grad_norm": 0.12221027910709381, |
| "learning_rate": 9.700323212657847e-06, |
| "loss": 0.0185, |
| "step": 16170 |
| }, |
| { |
| "epoch": 11.17403314917127, |
| "grad_norm": 0.12442642450332642, |
| "learning_rate": 9.65144187259388e-06, |
| "loss": 0.0228, |
| "step": 16180 |
| }, |
| { |
| "epoch": 11.180939226519337, |
| "grad_norm": 0.16251784563064575, |
| "learning_rate": 9.602670843947132e-06, |
| "loss": 0.0192, |
| "step": 16190 |
| }, |
| { |
| "epoch": 11.187845303867404, |
| "grad_norm": 0.16168147325515747, |
| "learning_rate": 9.554010260055713e-06, |
| "loss": 0.0166, |
| "step": 16200 |
| }, |
| { |
| "epoch": 11.19475138121547, |
| "grad_norm": 0.13379383087158203, |
| "learning_rate": 9.505460253955834e-06, |
| "loss": 0.0158, |
| "step": 16210 |
| }, |
| { |
| "epoch": 11.201657458563536, |
| "grad_norm": 0.15277086198329926, |
| "learning_rate": 9.457020958381324e-06, |
| "loss": 0.018, |
| "step": 16220 |
| }, |
| { |
| "epoch": 11.208563535911603, |
| "grad_norm": 0.16304939985275269, |
| "learning_rate": 9.408692505763395e-06, |
| "loss": 0.0184, |
| "step": 16230 |
| }, |
| { |
| "epoch": 11.215469613259668, |
| "grad_norm": 0.16197776794433594, |
| "learning_rate": 9.360475028230181e-06, |
| "loss": 0.0188, |
| "step": 16240 |
| }, |
| { |
| "epoch": 11.222375690607734, |
| "grad_norm": 0.12607796490192413, |
| "learning_rate": 9.312368657606412e-06, |
| "loss": 0.0198, |
| "step": 16250 |
| }, |
| { |
| "epoch": 11.2292817679558, |
| "grad_norm": 0.14476558566093445, |
| "learning_rate": 9.264373525413096e-06, |
| "loss": 0.0201, |
| "step": 16260 |
| }, |
| { |
| "epoch": 11.236187845303867, |
| "grad_norm": 0.13373053073883057, |
| "learning_rate": 9.216489762867058e-06, |
| "loss": 0.0225, |
| "step": 16270 |
| }, |
| { |
| "epoch": 11.243093922651934, |
| "grad_norm": 0.10646121948957443, |
| "learning_rate": 9.168717500880708e-06, |
| "loss": 0.0126, |
| "step": 16280 |
| }, |
| { |
| "epoch": 11.25, |
| "grad_norm": 0.10445374995470047, |
| "learning_rate": 9.121056870061574e-06, |
| "loss": 0.0242, |
| "step": 16290 |
| }, |
| { |
| "epoch": 11.256906077348066, |
| "grad_norm": 0.13283097743988037, |
| "learning_rate": 9.073508000711983e-06, |
| "loss": 0.0163, |
| "step": 16300 |
| }, |
| { |
| "epoch": 11.263812154696133, |
| "grad_norm": 0.15861085057258606, |
| "learning_rate": 9.026071022828758e-06, |
| "loss": 0.0208, |
| "step": 16310 |
| }, |
| { |
| "epoch": 11.2707182320442, |
| "grad_norm": 0.14492949843406677, |
| "learning_rate": 8.978746066102771e-06, |
| "loss": 0.0201, |
| "step": 16320 |
| }, |
| { |
| "epoch": 11.277624309392266, |
| "grad_norm": 0.16569550335407257, |
| "learning_rate": 8.931533259918634e-06, |
| "loss": 0.0172, |
| "step": 16330 |
| }, |
| { |
| "epoch": 11.284530386740332, |
| "grad_norm": 0.11864589899778366, |
| "learning_rate": 8.884432733354382e-06, |
| "loss": 0.021, |
| "step": 16340 |
| }, |
| { |
| "epoch": 11.291436464088397, |
| "grad_norm": 0.13216151297092438, |
| "learning_rate": 8.837444615181029e-06, |
| "loss": 0.0171, |
| "step": 16350 |
| }, |
| { |
| "epoch": 11.298342541436464, |
| "grad_norm": 0.12307021021842957, |
| "learning_rate": 8.790569033862323e-06, |
| "loss": 0.0203, |
| "step": 16360 |
| }, |
| { |
| "epoch": 11.30524861878453, |
| "grad_norm": 0.09344703704118729, |
| "learning_rate": 8.7438061175543e-06, |
| "loss": 0.0186, |
| "step": 16370 |
| }, |
| { |
| "epoch": 11.312154696132596, |
| "grad_norm": 0.12735137343406677, |
| "learning_rate": 8.697155994104978e-06, |
| "loss": 0.0203, |
| "step": 16380 |
| }, |
| { |
| "epoch": 11.319060773480663, |
| "grad_norm": 0.12188874930143356, |
| "learning_rate": 8.650618791054033e-06, |
| "loss": 0.0208, |
| "step": 16390 |
| }, |
| { |
| "epoch": 11.32596685082873, |
| "grad_norm": 0.12098925560712814, |
| "learning_rate": 8.604194635632373e-06, |
| "loss": 0.0204, |
| "step": 16400 |
| }, |
| { |
| "epoch": 11.332872928176796, |
| "grad_norm": 0.13109688460826874, |
| "learning_rate": 8.557883654761906e-06, |
| "loss": 0.0196, |
| "step": 16410 |
| }, |
| { |
| "epoch": 11.339779005524862, |
| "grad_norm": 0.1311589777469635, |
| "learning_rate": 8.511685975055061e-06, |
| "loss": 0.0162, |
| "step": 16420 |
| }, |
| { |
| "epoch": 11.346685082872929, |
| "grad_norm": 0.09084175527095795, |
| "learning_rate": 8.46560172281452e-06, |
| "loss": 0.0147, |
| "step": 16430 |
| }, |
| { |
| "epoch": 11.353591160220994, |
| "grad_norm": 0.12907159328460693, |
| "learning_rate": 8.419631024032893e-06, |
| "loss": 0.0167, |
| "step": 16440 |
| }, |
| { |
| "epoch": 11.36049723756906, |
| "grad_norm": 0.10988034307956696, |
| "learning_rate": 8.373774004392293e-06, |
| "loss": 0.0174, |
| "step": 16450 |
| }, |
| { |
| "epoch": 11.367403314917127, |
| "grad_norm": 0.1319379210472107, |
| "learning_rate": 8.32803078926409e-06, |
| "loss": 0.0158, |
| "step": 16460 |
| }, |
| { |
| "epoch": 11.374309392265193, |
| "grad_norm": 0.11994582414627075, |
| "learning_rate": 8.282401503708454e-06, |
| "loss": 0.0187, |
| "step": 16470 |
| }, |
| { |
| "epoch": 11.38121546961326, |
| "grad_norm": 0.14029355347156525, |
| "learning_rate": 8.23688627247412e-06, |
| "loss": 0.0216, |
| "step": 16480 |
| }, |
| { |
| "epoch": 11.388121546961326, |
| "grad_norm": 0.16516615450382233, |
| "learning_rate": 8.191485219998007e-06, |
| "loss": 0.021, |
| "step": 16490 |
| }, |
| { |
| "epoch": 11.395027624309392, |
| "grad_norm": 0.1420423984527588, |
| "learning_rate": 8.146198470404843e-06, |
| "loss": 0.0181, |
| "step": 16500 |
| }, |
| { |
| "epoch": 11.401933701657459, |
| "grad_norm": 0.19425787031650543, |
| "learning_rate": 8.101026147506897e-06, |
| "loss": 0.0165, |
| "step": 16510 |
| }, |
| { |
| "epoch": 11.408839779005525, |
| "grad_norm": 0.10360033065080643, |
| "learning_rate": 8.05596837480353e-06, |
| "loss": 0.0187, |
| "step": 16520 |
| }, |
| { |
| "epoch": 11.415745856353592, |
| "grad_norm": 0.06478369981050491, |
| "learning_rate": 8.011025275480998e-06, |
| "loss": 0.0188, |
| "step": 16530 |
| }, |
| { |
| "epoch": 11.422651933701658, |
| "grad_norm": 0.170049786567688, |
| "learning_rate": 7.966196972412027e-06, |
| "loss": 0.0169, |
| "step": 16540 |
| }, |
| { |
| "epoch": 11.429558011049723, |
| "grad_norm": 0.11986824870109558, |
| "learning_rate": 7.92148358815547e-06, |
| "loss": 0.0199, |
| "step": 16550 |
| }, |
| { |
| "epoch": 11.43646408839779, |
| "grad_norm": 0.29806897044181824, |
| "learning_rate": 7.87688524495604e-06, |
| "loss": 0.0188, |
| "step": 16560 |
| }, |
| { |
| "epoch": 11.443370165745856, |
| "grad_norm": 0.1958770602941513, |
| "learning_rate": 7.83240206474386e-06, |
| "loss": 0.0198, |
| "step": 16570 |
| }, |
| { |
| "epoch": 11.450276243093922, |
| "grad_norm": 0.13644862174987793, |
| "learning_rate": 7.788034169134272e-06, |
| "loss": 0.0212, |
| "step": 16580 |
| }, |
| { |
| "epoch": 11.457182320441989, |
| "grad_norm": 0.11058889329433441, |
| "learning_rate": 7.743781679427414e-06, |
| "loss": 0.0193, |
| "step": 16590 |
| }, |
| { |
| "epoch": 11.464088397790055, |
| "grad_norm": 0.09904930740594864, |
| "learning_rate": 7.699644716607895e-06, |
| "loss": 0.0179, |
| "step": 16600 |
| }, |
| { |
| "epoch": 11.470994475138122, |
| "grad_norm": 0.09778125584125519, |
| "learning_rate": 7.655623401344486e-06, |
| "loss": 0.0195, |
| "step": 16610 |
| }, |
| { |
| "epoch": 11.477900552486188, |
| "grad_norm": 0.1029466763138771, |
| "learning_rate": 7.611717853989775e-06, |
| "loss": 0.018, |
| "step": 16620 |
| }, |
| { |
| "epoch": 11.484806629834255, |
| "grad_norm": 0.08880892395973206, |
| "learning_rate": 7.567928194579854e-06, |
| "loss": 0.0184, |
| "step": 16630 |
| }, |
| { |
| "epoch": 11.491712707182321, |
| "grad_norm": 0.11798331141471863, |
| "learning_rate": 7.524254542833997e-06, |
| "loss": 0.0174, |
| "step": 16640 |
| }, |
| { |
| "epoch": 11.498618784530386, |
| "grad_norm": 0.19138731062412262, |
| "learning_rate": 7.480697018154286e-06, |
| "loss": 0.0208, |
| "step": 16650 |
| }, |
| { |
| "epoch": 11.505524861878452, |
| "grad_norm": 0.11023187637329102, |
| "learning_rate": 7.437255739625332e-06, |
| "loss": 0.0205, |
| "step": 16660 |
| }, |
| { |
| "epoch": 11.512430939226519, |
| "grad_norm": 0.13642142713069916, |
| "learning_rate": 7.393930826013923e-06, |
| "loss": 0.0174, |
| "step": 16670 |
| }, |
| { |
| "epoch": 11.519337016574585, |
| "grad_norm": 0.1239386722445488, |
| "learning_rate": 7.350722395768722e-06, |
| "loss": 0.019, |
| "step": 16680 |
| }, |
| { |
| "epoch": 11.526243093922652, |
| "grad_norm": 0.13160714507102966, |
| "learning_rate": 7.307630567019963e-06, |
| "loss": 0.017, |
| "step": 16690 |
| }, |
| { |
| "epoch": 11.533149171270718, |
| "grad_norm": 0.1242450550198555, |
| "learning_rate": 7.264655457579e-06, |
| "loss": 0.0186, |
| "step": 16700 |
| }, |
| { |
| "epoch": 11.540055248618785, |
| "grad_norm": 0.12114911526441574, |
| "learning_rate": 7.221797184938184e-06, |
| "loss": 0.0228, |
| "step": 16710 |
| }, |
| { |
| "epoch": 11.546961325966851, |
| "grad_norm": 0.1373281180858612, |
| "learning_rate": 7.179055866270373e-06, |
| "loss": 0.0142, |
| "step": 16720 |
| }, |
| { |
| "epoch": 11.553867403314918, |
| "grad_norm": 0.1476965695619583, |
| "learning_rate": 7.136431618428707e-06, |
| "loss": 0.0238, |
| "step": 16730 |
| }, |
| { |
| "epoch": 11.560773480662984, |
| "grad_norm": 0.15891554951667786, |
| "learning_rate": 7.09392455794628e-06, |
| "loss": 0.0149, |
| "step": 16740 |
| }, |
| { |
| "epoch": 11.567679558011049, |
| "grad_norm": 0.11660894006490707, |
| "learning_rate": 7.051534801035725e-06, |
| "loss": 0.0148, |
| "step": 16750 |
| }, |
| { |
| "epoch": 11.574585635359115, |
| "grad_norm": 0.14604243636131287, |
| "learning_rate": 7.00926246358905e-06, |
| "loss": 0.0178, |
| "step": 16760 |
| }, |
| { |
| "epoch": 11.581491712707182, |
| "grad_norm": 0.11174402385950089, |
| "learning_rate": 6.967107661177191e-06, |
| "loss": 0.022, |
| "step": 16770 |
| }, |
| { |
| "epoch": 11.588397790055248, |
| "grad_norm": 0.18665166199207306, |
| "learning_rate": 6.925070509049786e-06, |
| "loss": 0.0176, |
| "step": 16780 |
| }, |
| { |
| "epoch": 11.595303867403315, |
| "grad_norm": 0.16662661731243134, |
| "learning_rate": 6.883151122134812e-06, |
| "loss": 0.0211, |
| "step": 16790 |
| }, |
| { |
| "epoch": 11.602209944751381, |
| "grad_norm": 0.13698987662792206, |
| "learning_rate": 6.8413496150382394e-06, |
| "loss": 0.0186, |
| "step": 16800 |
| }, |
| { |
| "epoch": 11.609116022099448, |
| "grad_norm": 0.14518332481384277, |
| "learning_rate": 6.7996661020438165e-06, |
| "loss": 0.019, |
| "step": 16810 |
| }, |
| { |
| "epoch": 11.616022099447514, |
| "grad_norm": 0.12339243292808533, |
| "learning_rate": 6.758100697112662e-06, |
| "loss": 0.0187, |
| "step": 16820 |
| }, |
| { |
| "epoch": 11.62292817679558, |
| "grad_norm": 0.16226792335510254, |
| "learning_rate": 6.716653513883026e-06, |
| "loss": 0.0182, |
| "step": 16830 |
| }, |
| { |
| "epoch": 11.629834254143645, |
| "grad_norm": 0.11799878627061844, |
| "learning_rate": 6.675324665669913e-06, |
| "loss": 0.0193, |
| "step": 16840 |
| }, |
| { |
| "epoch": 11.636740331491712, |
| "grad_norm": 0.11323769390583038, |
| "learning_rate": 6.634114265464803e-06, |
| "loss": 0.0205, |
| "step": 16850 |
| }, |
| { |
| "epoch": 11.643646408839778, |
| "grad_norm": 0.15336430072784424, |
| "learning_rate": 6.59302242593538e-06, |
| "loss": 0.018, |
| "step": 16860 |
| }, |
| { |
| "epoch": 11.650552486187845, |
| "grad_norm": 0.13874183595180511, |
| "learning_rate": 6.552049259425141e-06, |
| "loss": 0.0149, |
| "step": 16870 |
| }, |
| { |
| "epoch": 11.657458563535911, |
| "grad_norm": 0.12732809782028198, |
| "learning_rate": 6.511194877953181e-06, |
| "loss": 0.0209, |
| "step": 16880 |
| }, |
| { |
| "epoch": 11.664364640883978, |
| "grad_norm": 0.08383426815271378, |
| "learning_rate": 6.470459393213813e-06, |
| "loss": 0.0184, |
| "step": 16890 |
| }, |
| { |
| "epoch": 11.671270718232044, |
| "grad_norm": 0.10465892404317856, |
| "learning_rate": 6.429842916576279e-06, |
| "loss": 0.0255, |
| "step": 16900 |
| }, |
| { |
| "epoch": 11.67817679558011, |
| "grad_norm": 0.1163114532828331, |
| "learning_rate": 6.389345559084503e-06, |
| "loss": 0.0213, |
| "step": 16910 |
| }, |
| { |
| "epoch": 11.685082872928177, |
| "grad_norm": 0.13213935494422913, |
| "learning_rate": 6.348967431456682e-06, |
| "loss": 0.0184, |
| "step": 16920 |
| }, |
| { |
| "epoch": 11.691988950276244, |
| "grad_norm": 0.1505950689315796, |
| "learning_rate": 6.30870864408511e-06, |
| "loss": 0.0174, |
| "step": 16930 |
| }, |
| { |
| "epoch": 11.69889502762431, |
| "grad_norm": 0.18978534638881683, |
| "learning_rate": 6.268569307035754e-06, |
| "loss": 0.0193, |
| "step": 16940 |
| }, |
| { |
| "epoch": 11.705801104972375, |
| "grad_norm": 0.15616071224212646, |
| "learning_rate": 6.228549530048022e-06, |
| "loss": 0.0186, |
| "step": 16950 |
| }, |
| { |
| "epoch": 11.712707182320441, |
| "grad_norm": 0.12579162418842316, |
| "learning_rate": 6.1886494225344814e-06, |
| "loss": 0.0192, |
| "step": 16960 |
| }, |
| { |
| "epoch": 11.719613259668508, |
| "grad_norm": 0.11010239273309708, |
| "learning_rate": 6.148869093580479e-06, |
| "loss": 0.0213, |
| "step": 16970 |
| }, |
| { |
| "epoch": 11.726519337016574, |
| "grad_norm": 0.11935733258724213, |
| "learning_rate": 6.109208651943921e-06, |
| "loss": 0.0162, |
| "step": 16980 |
| }, |
| { |
| "epoch": 11.73342541436464, |
| "grad_norm": 0.1642450988292694, |
| "learning_rate": 6.069668206054946e-06, |
| "loss": 0.0209, |
| "step": 16990 |
| }, |
| { |
| "epoch": 11.740331491712707, |
| "grad_norm": 0.20178638398647308, |
| "learning_rate": 6.0302478640156145e-06, |
| "loss": 0.0217, |
| "step": 17000 |
| }, |
| { |
| "epoch": 11.747237569060774, |
| "grad_norm": 0.16644486784934998, |
| "learning_rate": 5.990947733599644e-06, |
| "loss": 0.0204, |
| "step": 17010 |
| }, |
| { |
| "epoch": 11.75414364640884, |
| "grad_norm": 0.15117019414901733, |
| "learning_rate": 5.951767922252105e-06, |
| "loss": 0.0184, |
| "step": 17020 |
| }, |
| { |
| "epoch": 11.761049723756907, |
| "grad_norm": 0.07056622207164764, |
| "learning_rate": 5.912708537089068e-06, |
| "loss": 0.0201, |
| "step": 17030 |
| }, |
| { |
| "epoch": 11.767955801104973, |
| "grad_norm": 0.10951358079910278, |
| "learning_rate": 5.873769684897434e-06, |
| "loss": 0.0167, |
| "step": 17040 |
| }, |
| { |
| "epoch": 11.774861878453038, |
| "grad_norm": 0.13333486020565033, |
| "learning_rate": 5.834951472134514e-06, |
| "loss": 0.0174, |
| "step": 17050 |
| }, |
| { |
| "epoch": 11.781767955801104, |
| "grad_norm": 0.20654167234897614, |
| "learning_rate": 5.796254004927832e-06, |
| "loss": 0.0205, |
| "step": 17060 |
| }, |
| { |
| "epoch": 11.78867403314917, |
| "grad_norm": 0.19155727326869965, |
| "learning_rate": 5.757677389074806e-06, |
| "loss": 0.0158, |
| "step": 17070 |
| }, |
| { |
| "epoch": 11.795580110497237, |
| "grad_norm": 0.1749805510044098, |
| "learning_rate": 5.719221730042385e-06, |
| "loss": 0.0256, |
| "step": 17080 |
| }, |
| { |
| "epoch": 11.802486187845304, |
| "grad_norm": 0.07431744039058685, |
| "learning_rate": 5.680887132966911e-06, |
| "loss": 0.0166, |
| "step": 17090 |
| }, |
| { |
| "epoch": 11.80939226519337, |
| "grad_norm": 0.10040867328643799, |
| "learning_rate": 5.642673702653683e-06, |
| "loss": 0.0192, |
| "step": 17100 |
| }, |
| { |
| "epoch": 11.816298342541437, |
| "grad_norm": 0.14079715311527252, |
| "learning_rate": 5.604581543576781e-06, |
| "loss": 0.0182, |
| "step": 17110 |
| }, |
| { |
| "epoch": 11.823204419889503, |
| "grad_norm": 0.19855310022830963, |
| "learning_rate": 5.566610759878704e-06, |
| "loss": 0.0231, |
| "step": 17120 |
| }, |
| { |
| "epoch": 11.83011049723757, |
| "grad_norm": 0.17754405736923218, |
| "learning_rate": 5.528761455370119e-06, |
| "loss": 0.0214, |
| "step": 17130 |
| }, |
| { |
| "epoch": 11.837016574585636, |
| "grad_norm": 0.1382066309452057, |
| "learning_rate": 5.491033733529594e-06, |
| "loss": 0.0272, |
| "step": 17140 |
| }, |
| { |
| "epoch": 11.8439226519337, |
| "grad_norm": 0.16806116700172424, |
| "learning_rate": 5.453427697503255e-06, |
| "loss": 0.0166, |
| "step": 17150 |
| }, |
| { |
| "epoch": 11.850828729281767, |
| "grad_norm": 0.14684727787971497, |
| "learning_rate": 5.415943450104599e-06, |
| "loss": 0.0184, |
| "step": 17160 |
| }, |
| { |
| "epoch": 11.857734806629834, |
| "grad_norm": 0.1558583527803421, |
| "learning_rate": 5.378581093814111e-06, |
| "loss": 0.0196, |
| "step": 17170 |
| }, |
| { |
| "epoch": 11.8646408839779, |
| "grad_norm": 0.2262316346168518, |
| "learning_rate": 5.3413407307790375e-06, |
| "loss": 0.0165, |
| "step": 17180 |
| }, |
| { |
| "epoch": 11.871546961325967, |
| "grad_norm": 0.11336057633161545, |
| "learning_rate": 5.30422246281313e-06, |
| "loss": 0.02, |
| "step": 17190 |
| }, |
| { |
| "epoch": 11.878453038674033, |
| "grad_norm": 0.17291094362735748, |
| "learning_rate": 5.267226391396296e-06, |
| "loss": 0.0171, |
| "step": 17200 |
| }, |
| { |
| "epoch": 11.8853591160221, |
| "grad_norm": 0.25549980998039246, |
| "learning_rate": 5.2303526176744e-06, |
| "loss": 0.0236, |
| "step": 17210 |
| }, |
| { |
| "epoch": 11.892265193370166, |
| "grad_norm": 0.16734923422336578, |
| "learning_rate": 5.193601242458929e-06, |
| "loss": 0.0178, |
| "step": 17220 |
| }, |
| { |
| "epoch": 11.899171270718233, |
| "grad_norm": 0.11904910951852798, |
| "learning_rate": 5.156972366226714e-06, |
| "loss": 0.0167, |
| "step": 17230 |
| }, |
| { |
| "epoch": 11.9060773480663, |
| "grad_norm": 0.12467493116855621, |
| "learning_rate": 5.120466089119735e-06, |
| "loss": 0.0164, |
| "step": 17240 |
| }, |
| { |
| "epoch": 11.912983425414364, |
| "grad_norm": 0.08813518285751343, |
| "learning_rate": 5.084082510944749e-06, |
| "loss": 0.0249, |
| "step": 17250 |
| }, |
| { |
| "epoch": 11.91988950276243, |
| "grad_norm": 0.11498162150382996, |
| "learning_rate": 5.047821731173058e-06, |
| "loss": 0.0175, |
| "step": 17260 |
| }, |
| { |
| "epoch": 11.926795580110497, |
| "grad_norm": 0.14780119061470032, |
| "learning_rate": 5.011683848940274e-06, |
| "loss": 0.0188, |
| "step": 17270 |
| }, |
| { |
| "epoch": 11.933701657458563, |
| "grad_norm": 0.15292783081531525, |
| "learning_rate": 4.975668963045954e-06, |
| "loss": 0.0187, |
| "step": 17280 |
| }, |
| { |
| "epoch": 11.94060773480663, |
| "grad_norm": 0.1259051412343979, |
| "learning_rate": 4.9397771719534525e-06, |
| "loss": 0.0245, |
| "step": 17290 |
| }, |
| { |
| "epoch": 11.947513812154696, |
| "grad_norm": 0.11878091841936111, |
| "learning_rate": 4.904008573789548e-06, |
| "loss": 0.0193, |
| "step": 17300 |
| }, |
| { |
| "epoch": 11.954419889502763, |
| "grad_norm": 0.15138937532901764, |
| "learning_rate": 4.8683632663442005e-06, |
| "loss": 0.0217, |
| "step": 17310 |
| }, |
| { |
| "epoch": 11.96132596685083, |
| "grad_norm": 0.12729822099208832, |
| "learning_rate": 4.832841347070343e-06, |
| "loss": 0.0194, |
| "step": 17320 |
| }, |
| { |
| "epoch": 11.968232044198896, |
| "grad_norm": 0.1367802619934082, |
| "learning_rate": 4.797442913083539e-06, |
| "loss": 0.018, |
| "step": 17330 |
| }, |
| { |
| "epoch": 11.975138121546962, |
| "grad_norm": 0.11630649864673615, |
| "learning_rate": 4.7621680611617596e-06, |
| "loss": 0.022, |
| "step": 17340 |
| }, |
| { |
| "epoch": 11.982044198895027, |
| "grad_norm": 0.1627459079027176, |
| "learning_rate": 4.727016887745095e-06, |
| "loss": 0.0198, |
| "step": 17350 |
| }, |
| { |
| "epoch": 11.988950276243093, |
| "grad_norm": 0.18625329434871674, |
| "learning_rate": 4.691989488935511e-06, |
| "loss": 0.0216, |
| "step": 17360 |
| }, |
| { |
| "epoch": 11.99585635359116, |
| "grad_norm": 0.19761201739311218, |
| "learning_rate": 4.657085960496588e-06, |
| "loss": 0.0172, |
| "step": 17370 |
| }, |
| { |
| "epoch": 12.002762430939226, |
| "grad_norm": 0.14513880014419556, |
| "learning_rate": 4.6223063978532265e-06, |
| "loss": 0.0151, |
| "step": 17380 |
| }, |
| { |
| "epoch": 12.009668508287293, |
| "grad_norm": 0.07700514793395996, |
| "learning_rate": 4.587650896091439e-06, |
| "loss": 0.0158, |
| "step": 17390 |
| }, |
| { |
| "epoch": 12.01657458563536, |
| "grad_norm": 0.15232037007808685, |
| "learning_rate": 4.553119549958035e-06, |
| "loss": 0.0158, |
| "step": 17400 |
| }, |
| { |
| "epoch": 12.023480662983426, |
| "grad_norm": 0.12302271276712418, |
| "learning_rate": 4.518712453860385e-06, |
| "loss": 0.018, |
| "step": 17410 |
| }, |
| { |
| "epoch": 12.030386740331492, |
| "grad_norm": 0.1230573058128357, |
| "learning_rate": 4.484429701866205e-06, |
| "loss": 0.0207, |
| "step": 17420 |
| }, |
| { |
| "epoch": 12.037292817679559, |
| "grad_norm": 0.13379715383052826, |
| "learning_rate": 4.4502713877031975e-06, |
| "loss": 0.0214, |
| "step": 17430 |
| }, |
| { |
| "epoch": 12.044198895027625, |
| "grad_norm": 0.13079579174518585, |
| "learning_rate": 4.416237604758911e-06, |
| "loss": 0.0191, |
| "step": 17440 |
| }, |
| { |
| "epoch": 12.05110497237569, |
| "grad_norm": 0.11686630547046661, |
| "learning_rate": 4.3823284460804025e-06, |
| "loss": 0.0165, |
| "step": 17450 |
| }, |
| { |
| "epoch": 12.058011049723756, |
| "grad_norm": 0.1403651237487793, |
| "learning_rate": 4.348544004374011e-06, |
| "loss": 0.0179, |
| "step": 17460 |
| }, |
| { |
| "epoch": 12.064917127071823, |
| "grad_norm": 0.17115865647792816, |
| "learning_rate": 4.314884372005123e-06, |
| "loss": 0.0196, |
| "step": 17470 |
| }, |
| { |
| "epoch": 12.07182320441989, |
| "grad_norm": 0.13972045481204987, |
| "learning_rate": 4.281349640997867e-06, |
| "loss": 0.0206, |
| "step": 17480 |
| }, |
| { |
| "epoch": 12.078729281767956, |
| "grad_norm": 0.1649554967880249, |
| "learning_rate": 4.247939903034942e-06, |
| "loss": 0.0195, |
| "step": 17490 |
| }, |
| { |
| "epoch": 12.085635359116022, |
| "grad_norm": 0.11629771441221237, |
| "learning_rate": 4.214655249457284e-06, |
| "loss": 0.0152, |
| "step": 17500 |
| }, |
| { |
| "epoch": 12.092541436464089, |
| "grad_norm": 0.17439007759094238, |
| "learning_rate": 4.181495771263855e-06, |
| "loss": 0.0158, |
| "step": 17510 |
| }, |
| { |
| "epoch": 12.099447513812155, |
| "grad_norm": 0.1433987021446228, |
| "learning_rate": 4.148461559111427e-06, |
| "loss": 0.02, |
| "step": 17520 |
| }, |
| { |
| "epoch": 12.106353591160222, |
| "grad_norm": 0.11897672712802887, |
| "learning_rate": 4.115552703314252e-06, |
| "loss": 0.0172, |
| "step": 17530 |
| }, |
| { |
| "epoch": 12.113259668508288, |
| "grad_norm": 0.1429292857646942, |
| "learning_rate": 4.082769293843886e-06, |
| "loss": 0.026, |
| "step": 17540 |
| }, |
| { |
| "epoch": 12.120165745856353, |
| "grad_norm": 0.07817108929157257, |
| "learning_rate": 4.050111420328939e-06, |
| "loss": 0.0207, |
| "step": 17550 |
| }, |
| { |
| "epoch": 12.12707182320442, |
| "grad_norm": 0.18502286076545715, |
| "learning_rate": 4.017579172054764e-06, |
| "loss": 0.0176, |
| "step": 17560 |
| }, |
| { |
| "epoch": 12.133977900552486, |
| "grad_norm": 0.17949388921260834, |
| "learning_rate": 3.985172637963308e-06, |
| "loss": 0.0222, |
| "step": 17570 |
| }, |
| { |
| "epoch": 12.140883977900552, |
| "grad_norm": 0.10429384559392929, |
| "learning_rate": 3.952891906652784e-06, |
| "loss": 0.0187, |
| "step": 17580 |
| }, |
| { |
| "epoch": 12.147790055248619, |
| "grad_norm": 0.14530150592327118, |
| "learning_rate": 3.920737066377478e-06, |
| "loss": 0.0199, |
| "step": 17590 |
| }, |
| { |
| "epoch": 12.154696132596685, |
| "grad_norm": 0.13472408056259155, |
| "learning_rate": 3.888708205047509e-06, |
| "loss": 0.0146, |
| "step": 17600 |
| }, |
| { |
| "epoch": 12.161602209944752, |
| "grad_norm": 0.12338479608297348, |
| "learning_rate": 3.856805410228542e-06, |
| "loss": 0.014, |
| "step": 17610 |
| }, |
| { |
| "epoch": 12.168508287292818, |
| "grad_norm": 0.12579374015331268, |
| "learning_rate": 3.82502876914162e-06, |
| "loss": 0.0186, |
| "step": 17620 |
| }, |
| { |
| "epoch": 12.175414364640885, |
| "grad_norm": 0.12865792214870453, |
| "learning_rate": 3.7933783686628586e-06, |
| "loss": 0.0176, |
| "step": 17630 |
| }, |
| { |
| "epoch": 12.182320441988951, |
| "grad_norm": 0.1593572199344635, |
| "learning_rate": 3.7618542953232306e-06, |
| "loss": 0.0205, |
| "step": 17640 |
| }, |
| { |
| "epoch": 12.189226519337016, |
| "grad_norm": 0.15046411752700806, |
| "learning_rate": 3.7304566353083658e-06, |
| "loss": 0.0187, |
| "step": 17650 |
| }, |
| { |
| "epoch": 12.196132596685082, |
| "grad_norm": 0.14371070265769958, |
| "learning_rate": 3.6991854744582555e-06, |
| "loss": 0.018, |
| "step": 17660 |
| }, |
| { |
| "epoch": 12.203038674033149, |
| "grad_norm": 0.2144523411989212, |
| "learning_rate": 3.6680408982670777e-06, |
| "loss": 0.0231, |
| "step": 17670 |
| }, |
| { |
| "epoch": 12.209944751381215, |
| "grad_norm": 0.13164383172988892, |
| "learning_rate": 3.637022991882899e-06, |
| "loss": 0.0152, |
| "step": 17680 |
| }, |
| { |
| "epoch": 12.216850828729282, |
| "grad_norm": 0.1407146155834198, |
| "learning_rate": 3.606131840107485e-06, |
| "loss": 0.019, |
| "step": 17690 |
| }, |
| { |
| "epoch": 12.223756906077348, |
| "grad_norm": 0.23521016538143158, |
| "learning_rate": 3.575367527396084e-06, |
| "loss": 0.0237, |
| "step": 17700 |
| }, |
| { |
| "epoch": 12.230662983425415, |
| "grad_norm": 0.15884459018707275, |
| "learning_rate": 3.5447301378571386e-06, |
| "loss": 0.0195, |
| "step": 17710 |
| }, |
| { |
| "epoch": 12.237569060773481, |
| "grad_norm": 0.16665038466453552, |
| "learning_rate": 3.514219755252113e-06, |
| "loss": 0.0179, |
| "step": 17720 |
| }, |
| { |
| "epoch": 12.244475138121548, |
| "grad_norm": 0.1557607352733612, |
| "learning_rate": 3.4838364629952213e-06, |
| "loss": 0.0166, |
| "step": 17730 |
| }, |
| { |
| "epoch": 12.251381215469614, |
| "grad_norm": 0.1939476877450943, |
| "learning_rate": 3.4535803441532123e-06, |
| "loss": 0.0215, |
| "step": 17740 |
| }, |
| { |
| "epoch": 12.258287292817679, |
| "grad_norm": 0.07367555052042007, |
| "learning_rate": 3.4234514814451836e-06, |
| "loss": 0.0182, |
| "step": 17750 |
| }, |
| { |
| "epoch": 12.265193370165745, |
| "grad_norm": 0.14954784512519836, |
| "learning_rate": 3.393449957242273e-06, |
| "loss": 0.0168, |
| "step": 17760 |
| }, |
| { |
| "epoch": 12.272099447513812, |
| "grad_norm": 0.11408301442861557, |
| "learning_rate": 3.363575853567524e-06, |
| "loss": 0.0234, |
| "step": 17770 |
| }, |
| { |
| "epoch": 12.279005524861878, |
| "grad_norm": 0.1830197125673294, |
| "learning_rate": 3.3338292520955826e-06, |
| "loss": 0.016, |
| "step": 17780 |
| }, |
| { |
| "epoch": 12.285911602209945, |
| "grad_norm": 0.12380652874708176, |
| "learning_rate": 3.304210234152516e-06, |
| "loss": 0.0213, |
| "step": 17790 |
| }, |
| { |
| "epoch": 12.292817679558011, |
| "grad_norm": 0.13707053661346436, |
| "learning_rate": 3.2747188807155993e-06, |
| "loss": 0.0191, |
| "step": 17800 |
| }, |
| { |
| "epoch": 12.299723756906078, |
| "grad_norm": 0.1347280889749527, |
| "learning_rate": 3.2453552724130643e-06, |
| "loss": 0.022, |
| "step": 17810 |
| }, |
| { |
| "epoch": 12.306629834254144, |
| "grad_norm": 0.13858382403850555, |
| "learning_rate": 3.216119489523889e-06, |
| "loss": 0.0165, |
| "step": 17820 |
| }, |
| { |
| "epoch": 12.31353591160221, |
| "grad_norm": 0.15664106607437134, |
| "learning_rate": 3.1870116119775917e-06, |
| "loss": 0.0201, |
| "step": 17830 |
| }, |
| { |
| "epoch": 12.320441988950277, |
| "grad_norm": 0.20320770144462585, |
| "learning_rate": 3.158031719353999e-06, |
| "loss": 0.0205, |
| "step": 17840 |
| }, |
| { |
| "epoch": 12.327348066298342, |
| "grad_norm": 0.1687484085559845, |
| "learning_rate": 3.1291798908830273e-06, |
| "loss": 0.0277, |
| "step": 17850 |
| }, |
| { |
| "epoch": 12.334254143646408, |
| "grad_norm": 0.1131737232208252, |
| "learning_rate": 3.1004562054444853e-06, |
| "loss": 0.0183, |
| "step": 17860 |
| }, |
| { |
| "epoch": 12.341160220994475, |
| "grad_norm": 0.16560691595077515, |
| "learning_rate": 3.071860741567806e-06, |
| "loss": 0.0184, |
| "step": 17870 |
| }, |
| { |
| "epoch": 12.348066298342541, |
| "grad_norm": 0.17285485565662384, |
| "learning_rate": 3.04339357743193e-06, |
| "loss": 0.0189, |
| "step": 17880 |
| }, |
| { |
| "epoch": 12.354972375690608, |
| "grad_norm": 0.16125869750976562, |
| "learning_rate": 3.0150547908649628e-06, |
| "loss": 0.019, |
| "step": 17890 |
| }, |
| { |
| "epoch": 12.361878453038674, |
| "grad_norm": 0.1124703586101532, |
| "learning_rate": 2.9868444593440957e-06, |
| "loss": 0.0184, |
| "step": 17900 |
| }, |
| { |
| "epoch": 12.36878453038674, |
| "grad_norm": 0.15693354606628418, |
| "learning_rate": 2.9587626599952846e-06, |
| "loss": 0.0149, |
| "step": 17910 |
| }, |
| { |
| "epoch": 12.375690607734807, |
| "grad_norm": 0.15678930282592773, |
| "learning_rate": 2.930809469593082e-06, |
| "loss": 0.0215, |
| "step": 17920 |
| }, |
| { |
| "epoch": 12.382596685082873, |
| "grad_norm": 0.13219571113586426, |
| "learning_rate": 2.9029849645604733e-06, |
| "loss": 0.0214, |
| "step": 17930 |
| }, |
| { |
| "epoch": 12.38950276243094, |
| "grad_norm": 0.14746655523777008, |
| "learning_rate": 2.8752892209685632e-06, |
| "loss": 0.0191, |
| "step": 17940 |
| }, |
| { |
| "epoch": 12.396408839779006, |
| "grad_norm": 0.12485410273075104, |
| "learning_rate": 2.847722314536483e-06, |
| "loss": 0.0185, |
| "step": 17950 |
| }, |
| { |
| "epoch": 12.403314917127071, |
| "grad_norm": 0.08592341095209122, |
| "learning_rate": 2.820284320631078e-06, |
| "loss": 0.0179, |
| "step": 17960 |
| }, |
| { |
| "epoch": 12.410220994475138, |
| "grad_norm": 0.1952495276927948, |
| "learning_rate": 2.792975314266788e-06, |
| "loss": 0.0234, |
| "step": 17970 |
| }, |
| { |
| "epoch": 12.417127071823204, |
| "grad_norm": 0.13520370423793793, |
| "learning_rate": 2.7657953701054007e-06, |
| "loss": 0.0202, |
| "step": 17980 |
| }, |
| { |
| "epoch": 12.42403314917127, |
| "grad_norm": 0.17327278852462769, |
| "learning_rate": 2.7387445624558306e-06, |
| "loss": 0.0201, |
| "step": 17990 |
| }, |
| { |
| "epoch": 12.430939226519337, |
| "grad_norm": 0.09471528232097626, |
| "learning_rate": 2.7118229652739747e-06, |
| "loss": 0.0185, |
| "step": 18000 |
| }, |
| { |
| "epoch": 12.437845303867404, |
| "grad_norm": 0.10891865938901901, |
| "learning_rate": 2.6850306521624236e-06, |
| "loss": 0.0218, |
| "step": 18010 |
| }, |
| { |
| "epoch": 12.44475138121547, |
| "grad_norm": 0.1492462456226349, |
| "learning_rate": 2.6583676963703507e-06, |
| "loss": 0.0221, |
| "step": 18020 |
| }, |
| { |
| "epoch": 12.451657458563536, |
| "grad_norm": 0.14195561408996582, |
| "learning_rate": 2.631834170793268e-06, |
| "loss": 0.0209, |
| "step": 18030 |
| }, |
| { |
| "epoch": 12.458563535911603, |
| "grad_norm": 0.11009178310632706, |
| "learning_rate": 2.6054301479728036e-06, |
| "loss": 0.0165, |
| "step": 18040 |
| }, |
| { |
| "epoch": 12.465469613259668, |
| "grad_norm": 0.12649135291576385, |
| "learning_rate": 2.579155700096575e-06, |
| "loss": 0.0212, |
| "step": 18050 |
| }, |
| { |
| "epoch": 12.472375690607734, |
| "grad_norm": 0.16060461103916168, |
| "learning_rate": 2.5530108989978873e-06, |
| "loss": 0.0217, |
| "step": 18060 |
| }, |
| { |
| "epoch": 12.4792817679558, |
| "grad_norm": 0.12160547077655792, |
| "learning_rate": 2.5269958161556416e-06, |
| "loss": 0.0165, |
| "step": 18070 |
| }, |
| { |
| "epoch": 12.486187845303867, |
| "grad_norm": 0.16475243866443634, |
| "learning_rate": 2.5011105226940888e-06, |
| "loss": 0.0145, |
| "step": 18080 |
| }, |
| { |
| "epoch": 12.493093922651934, |
| "grad_norm": 0.0938870906829834, |
| "learning_rate": 2.4753550893826248e-06, |
| "loss": 0.0179, |
| "step": 18090 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.11797482520341873, |
| "learning_rate": 2.4497295866356296e-06, |
| "loss": 0.0199, |
| "step": 18100 |
| }, |
| { |
| "epoch": 12.506906077348066, |
| "grad_norm": 0.08225338906049728, |
| "learning_rate": 2.424234084512228e-06, |
| "loss": 0.0188, |
| "step": 18110 |
| }, |
| { |
| "epoch": 12.513812154696133, |
| "grad_norm": 0.14476081728935242, |
| "learning_rate": 2.3988686527161687e-06, |
| "loss": 0.0177, |
| "step": 18120 |
| }, |
| { |
| "epoch": 12.5207182320442, |
| "grad_norm": 0.15867789089679718, |
| "learning_rate": 2.373633360595573e-06, |
| "loss": 0.0168, |
| "step": 18130 |
| }, |
| { |
| "epoch": 12.527624309392266, |
| "grad_norm": 0.15915602445602417, |
| "learning_rate": 2.3485282771427585e-06, |
| "loss": 0.0187, |
| "step": 18140 |
| }, |
| { |
| "epoch": 12.534530386740332, |
| "grad_norm": 0.11553303152322769, |
| "learning_rate": 2.3235534709940665e-06, |
| "loss": 0.0191, |
| "step": 18150 |
| }, |
| { |
| "epoch": 12.541436464088397, |
| "grad_norm": 0.1287674605846405, |
| "learning_rate": 2.2987090104296617e-06, |
| "loss": 0.0169, |
| "step": 18160 |
| }, |
| { |
| "epoch": 12.548342541436464, |
| "grad_norm": 0.14101791381835938, |
| "learning_rate": 2.273994963373355e-06, |
| "loss": 0.0179, |
| "step": 18170 |
| }, |
| { |
| "epoch": 12.55524861878453, |
| "grad_norm": 0.2093939483165741, |
| "learning_rate": 2.249411397392409e-06, |
| "loss": 0.0206, |
| "step": 18180 |
| }, |
| { |
| "epoch": 12.562154696132596, |
| "grad_norm": 0.10685942322015762, |
| "learning_rate": 2.2249583796973506e-06, |
| "loss": 0.0208, |
| "step": 18190 |
| }, |
| { |
| "epoch": 12.569060773480663, |
| "grad_norm": 0.12277834117412567, |
| "learning_rate": 2.200635977141796e-06, |
| "loss": 0.0176, |
| "step": 18200 |
| }, |
| { |
| "epoch": 12.57596685082873, |
| "grad_norm": 0.12654268741607666, |
| "learning_rate": 2.17644425622226e-06, |
| "loss": 0.0152, |
| "step": 18210 |
| }, |
| { |
| "epoch": 12.582872928176796, |
| "grad_norm": 0.15644238889217377, |
| "learning_rate": 2.152383283077991e-06, |
| "loss": 0.0174, |
| "step": 18220 |
| }, |
| { |
| "epoch": 12.589779005524862, |
| "grad_norm": 0.21075870096683502, |
| "learning_rate": 2.128453123490781e-06, |
| "loss": 0.0198, |
| "step": 18230 |
| }, |
| { |
| "epoch": 12.596685082872929, |
| "grad_norm": 0.1122535914182663, |
| "learning_rate": 2.1046538428847462e-06, |
| "loss": 0.0194, |
| "step": 18240 |
| }, |
| { |
| "epoch": 12.603591160220994, |
| "grad_norm": 0.12407967448234558, |
| "learning_rate": 2.0809855063262273e-06, |
| "loss": 0.0192, |
| "step": 18250 |
| }, |
| { |
| "epoch": 12.61049723756906, |
| "grad_norm": 0.18401677906513214, |
| "learning_rate": 2.057448178523558e-06, |
| "loss": 0.0204, |
| "step": 18260 |
| }, |
| { |
| "epoch": 12.617403314917127, |
| "grad_norm": 0.10147448629140854, |
| "learning_rate": 2.034041923826885e-06, |
| "loss": 0.0185, |
| "step": 18270 |
| }, |
| { |
| "epoch": 12.624309392265193, |
| "grad_norm": 0.14004665613174438, |
| "learning_rate": 2.0107668062280204e-06, |
| "loss": 0.0179, |
| "step": 18280 |
| }, |
| { |
| "epoch": 12.63121546961326, |
| "grad_norm": 0.16008542478084564, |
| "learning_rate": 1.9876228893602357e-06, |
| "loss": 0.0177, |
| "step": 18290 |
| }, |
| { |
| "epoch": 12.638121546961326, |
| "grad_norm": 0.08414597809314728, |
| "learning_rate": 1.9646102364981266e-06, |
| "loss": 0.0222, |
| "step": 18300 |
| }, |
| { |
| "epoch": 12.645027624309392, |
| "grad_norm": 0.11705183237791061, |
| "learning_rate": 1.9417289105574053e-06, |
| "loss": 0.0197, |
| "step": 18310 |
| }, |
| { |
| "epoch": 12.651933701657459, |
| "grad_norm": 0.11046871542930603, |
| "learning_rate": 1.9189789740947427e-06, |
| "loss": 0.0169, |
| "step": 18320 |
| }, |
| { |
| "epoch": 12.658839779005525, |
| "grad_norm": 0.11994214355945587, |
| "learning_rate": 1.896360489307597e-06, |
| "loss": 0.0183, |
| "step": 18330 |
| }, |
| { |
| "epoch": 12.665745856353592, |
| "grad_norm": 0.13720695674419403, |
| "learning_rate": 1.8738735180340362e-06, |
| "loss": 0.0184, |
| "step": 18340 |
| }, |
| { |
| "epoch": 12.672651933701658, |
| "grad_norm": 0.14560860395431519, |
| "learning_rate": 1.8515181217525824e-06, |
| "loss": 0.0185, |
| "step": 18350 |
| }, |
| { |
| "epoch": 12.679558011049723, |
| "grad_norm": 0.12190794199705124, |
| "learning_rate": 1.8292943615820457e-06, |
| "loss": 0.0161, |
| "step": 18360 |
| }, |
| { |
| "epoch": 12.68646408839779, |
| "grad_norm": 0.18701349198818207, |
| "learning_rate": 1.8072022982813296e-06, |
| "loss": 0.0219, |
| "step": 18370 |
| }, |
| { |
| "epoch": 12.693370165745856, |
| "grad_norm": 0.11787126213312149, |
| "learning_rate": 1.7852419922492925e-06, |
| "loss": 0.0209, |
| "step": 18380 |
| }, |
| { |
| "epoch": 12.700276243093922, |
| "grad_norm": 0.12787750363349915, |
| "learning_rate": 1.763413503524569e-06, |
| "loss": 0.0195, |
| "step": 18390 |
| }, |
| { |
| "epoch": 12.707182320441989, |
| "grad_norm": 0.16412900388240814, |
| "learning_rate": 1.7417168917854165e-06, |
| "loss": 0.0235, |
| "step": 18400 |
| }, |
| { |
| "epoch": 12.714088397790055, |
| "grad_norm": 0.1265489161014557, |
| "learning_rate": 1.720152216349552e-06, |
| "loss": 0.0208, |
| "step": 18410 |
| }, |
| { |
| "epoch": 12.720994475138122, |
| "grad_norm": 0.16048400104045868, |
| "learning_rate": 1.6987195361739595e-06, |
| "loss": 0.0225, |
| "step": 18420 |
| }, |
| { |
| "epoch": 12.727900552486188, |
| "grad_norm": 0.14972177147865295, |
| "learning_rate": 1.6774189098547832e-06, |
| "loss": 0.018, |
| "step": 18430 |
| }, |
| { |
| "epoch": 12.734806629834255, |
| "grad_norm": 0.14532731473445892, |
| "learning_rate": 1.6562503956271069e-06, |
| "loss": 0.0162, |
| "step": 18440 |
| }, |
| { |
| "epoch": 12.74171270718232, |
| "grad_norm": 0.13556219637393951, |
| "learning_rate": 1.6352140513648417e-06, |
| "loss": 0.0217, |
| "step": 18450 |
| }, |
| { |
| "epoch": 12.748618784530386, |
| "grad_norm": 0.12861819565296173, |
| "learning_rate": 1.6143099345805712e-06, |
| "loss": 0.0186, |
| "step": 18460 |
| }, |
| { |
| "epoch": 12.755524861878452, |
| "grad_norm": 0.1650349497795105, |
| "learning_rate": 1.5935381024253293e-06, |
| "loss": 0.0201, |
| "step": 18470 |
| }, |
| { |
| "epoch": 12.762430939226519, |
| "grad_norm": 0.11116552352905273, |
| "learning_rate": 1.572898611688517e-06, |
| "loss": 0.0146, |
| "step": 18480 |
| }, |
| { |
| "epoch": 12.769337016574585, |
| "grad_norm": 0.13286760449409485, |
| "learning_rate": 1.5523915187977133e-06, |
| "loss": 0.0196, |
| "step": 18490 |
| }, |
| { |
| "epoch": 12.776243093922652, |
| "grad_norm": 0.15505313873291016, |
| "learning_rate": 1.532016879818532e-06, |
| "loss": 0.0186, |
| "step": 18500 |
| }, |
| { |
| "epoch": 12.783149171270718, |
| "grad_norm": 0.1274903118610382, |
| "learning_rate": 1.51177475045447e-06, |
| "loss": 0.0185, |
| "step": 18510 |
| }, |
| { |
| "epoch": 12.790055248618785, |
| "grad_norm": 0.14875926077365875, |
| "learning_rate": 1.4916651860467035e-06, |
| "loss": 0.0224, |
| "step": 18520 |
| }, |
| { |
| "epoch": 12.796961325966851, |
| "grad_norm": 0.20385435223579407, |
| "learning_rate": 1.471688241574043e-06, |
| "loss": 0.019, |
| "step": 18530 |
| }, |
| { |
| "epoch": 12.803867403314918, |
| "grad_norm": 0.19278424978256226, |
| "learning_rate": 1.451843971652672e-06, |
| "loss": 0.0249, |
| "step": 18540 |
| }, |
| { |
| "epoch": 12.810773480662984, |
| "grad_norm": 0.16716869175434113, |
| "learning_rate": 1.432132430536076e-06, |
| "loss": 0.0207, |
| "step": 18550 |
| }, |
| { |
| "epoch": 12.817679558011049, |
| "grad_norm": 0.1731618344783783, |
| "learning_rate": 1.412553672114869e-06, |
| "loss": 0.0194, |
| "step": 18560 |
| }, |
| { |
| "epoch": 12.824585635359115, |
| "grad_norm": 0.11455850303173065, |
| "learning_rate": 1.3931077499166056e-06, |
| "loss": 0.0196, |
| "step": 18570 |
| }, |
| { |
| "epoch": 12.831491712707182, |
| "grad_norm": 0.18242445588111877, |
| "learning_rate": 1.3737947171057085e-06, |
| "loss": 0.0171, |
| "step": 18580 |
| }, |
| { |
| "epoch": 12.838397790055248, |
| "grad_norm": 0.12524786591529846, |
| "learning_rate": 1.3546146264832582e-06, |
| "loss": 0.0191, |
| "step": 18590 |
| }, |
| { |
| "epoch": 12.845303867403315, |
| "grad_norm": 0.13134995102882385, |
| "learning_rate": 1.3355675304869086e-06, |
| "loss": 0.0213, |
| "step": 18600 |
| }, |
| { |
| "epoch": 12.852209944751381, |
| "grad_norm": 0.1714327186346054, |
| "learning_rate": 1.3166534811906827e-06, |
| "loss": 0.0208, |
| "step": 18610 |
| }, |
| { |
| "epoch": 12.859116022099448, |
| "grad_norm": 0.10196040570735931, |
| "learning_rate": 1.2978725303048666e-06, |
| "loss": 0.0164, |
| "step": 18620 |
| }, |
| { |
| "epoch": 12.866022099447514, |
| "grad_norm": 0.12005294859409332, |
| "learning_rate": 1.2792247291758762e-06, |
| "loss": 0.0185, |
| "step": 18630 |
| }, |
| { |
| "epoch": 12.87292817679558, |
| "grad_norm": 0.15142318606376648, |
| "learning_rate": 1.2607101287860635e-06, |
| "loss": 0.016, |
| "step": 18640 |
| }, |
| { |
| "epoch": 12.879834254143645, |
| "grad_norm": 0.1705733835697174, |
| "learning_rate": 1.2423287797536654e-06, |
| "loss": 0.0192, |
| "step": 18650 |
| }, |
| { |
| "epoch": 12.886740331491712, |
| "grad_norm": 0.13848072290420532, |
| "learning_rate": 1.2240807323325776e-06, |
| "loss": 0.0171, |
| "step": 18660 |
| }, |
| { |
| "epoch": 12.893646408839778, |
| "grad_norm": 0.17544282972812653, |
| "learning_rate": 1.205966036412254e-06, |
| "loss": 0.0215, |
| "step": 18670 |
| }, |
| { |
| "epoch": 12.900552486187845, |
| "grad_norm": 0.1576301008462906, |
| "learning_rate": 1.1879847415175949e-06, |
| "loss": 0.0258, |
| "step": 18680 |
| }, |
| { |
| "epoch": 12.907458563535911, |
| "grad_norm": 0.13134483993053436, |
| "learning_rate": 1.1701368968087712e-06, |
| "loss": 0.0171, |
| "step": 18690 |
| }, |
| { |
| "epoch": 12.914364640883978, |
| "grad_norm": 0.1861048936843872, |
| "learning_rate": 1.1524225510811116e-06, |
| "loss": 0.0216, |
| "step": 18700 |
| }, |
| { |
| "epoch": 12.921270718232044, |
| "grad_norm": 0.12704524397850037, |
| "learning_rate": 1.1348417527649535e-06, |
| "loss": 0.0207, |
| "step": 18710 |
| }, |
| { |
| "epoch": 12.92817679558011, |
| "grad_norm": 0.10087218880653381, |
| "learning_rate": 1.1173945499255268e-06, |
| "loss": 0.0166, |
| "step": 18720 |
| }, |
| { |
| "epoch": 12.935082872928177, |
| "grad_norm": 0.16379135847091675, |
| "learning_rate": 1.1000809902628307e-06, |
| "loss": 0.017, |
| "step": 18730 |
| }, |
| { |
| "epoch": 12.941988950276244, |
| "grad_norm": 0.12458409368991852, |
| "learning_rate": 1.082901121111468e-06, |
| "loss": 0.0183, |
| "step": 18740 |
| }, |
| { |
| "epoch": 12.94889502762431, |
| "grad_norm": 0.18087325990200043, |
| "learning_rate": 1.0658549894405456e-06, |
| "loss": 0.0168, |
| "step": 18750 |
| }, |
| { |
| "epoch": 12.955801104972375, |
| "grad_norm": 0.15164682269096375, |
| "learning_rate": 1.0489426418535342e-06, |
| "loss": 0.0187, |
| "step": 18760 |
| }, |
| { |
| "epoch": 12.962707182320441, |
| "grad_norm": 0.15599878132343292, |
| "learning_rate": 1.0321641245881474e-06, |
| "loss": 0.0175, |
| "step": 18770 |
| }, |
| { |
| "epoch": 12.969613259668508, |
| "grad_norm": 0.08068261295557022, |
| "learning_rate": 1.015519483516214e-06, |
| "loss": 0.0154, |
| "step": 18780 |
| }, |
| { |
| "epoch": 12.976519337016574, |
| "grad_norm": 0.14306342601776123, |
| "learning_rate": 9.990087641435443e-07, |
| "loss": 0.0202, |
| "step": 18790 |
| }, |
| { |
| "epoch": 12.98342541436464, |
| "grad_norm": 0.1224728599190712, |
| "learning_rate": 9.826320116098132e-07, |
| "loss": 0.019, |
| "step": 18800 |
| }, |
| { |
| "epoch": 12.990331491712707, |
| "grad_norm": 0.1325245350599289, |
| "learning_rate": 9.663892706884447e-07, |
| "loss": 0.0196, |
| "step": 18810 |
| }, |
| { |
| "epoch": 12.997237569060774, |
| "grad_norm": 0.12311689555644989, |
| "learning_rate": 9.502805857864616e-07, |
| "loss": 0.0162, |
| "step": 18820 |
| }, |
| { |
| "epoch": 13.00414364640884, |
| "grad_norm": 0.22265133261680603, |
| "learning_rate": 9.34306000944396e-07, |
| "loss": 0.0196, |
| "step": 18830 |
| }, |
| { |
| "epoch": 13.011049723756907, |
| "grad_norm": 0.11388003826141357, |
| "learning_rate": 9.184655598361624e-07, |
| "loss": 0.0151, |
| "step": 18840 |
| }, |
| { |
| "epoch": 13.017955801104973, |
| "grad_norm": 0.11467894166707993, |
| "learning_rate": 9.027593057689076e-07, |
| "loss": 0.0162, |
| "step": 18850 |
| }, |
| { |
| "epoch": 13.024861878453038, |
| "grad_norm": 0.17213301360607147, |
| "learning_rate": 8.871872816829441e-07, |
| "loss": 0.0164, |
| "step": 18860 |
| }, |
| { |
| "epoch": 13.031767955801104, |
| "grad_norm": 0.10395052284002304, |
| "learning_rate": 8.717495301515777e-07, |
| "loss": 0.0162, |
| "step": 18870 |
| }, |
| { |
| "epoch": 13.03867403314917, |
| "grad_norm": 0.14414413273334503, |
| "learning_rate": 8.564460933810415e-07, |
| "loss": 0.0185, |
| "step": 18880 |
| }, |
| { |
| "epoch": 13.045580110497237, |
| "grad_norm": 0.14760589599609375, |
| "learning_rate": 8.412770132103453e-07, |
| "loss": 0.0179, |
| "step": 18890 |
| }, |
| { |
| "epoch": 13.052486187845304, |
| "grad_norm": 0.14571161568164825, |
| "learning_rate": 8.262423311111711e-07, |
| "loss": 0.017, |
| "step": 18900 |
| }, |
| { |
| "epoch": 13.05939226519337, |
| "grad_norm": 0.1314871907234192, |
| "learning_rate": 8.113420881877665e-07, |
| "loss": 0.0184, |
| "step": 18910 |
| }, |
| { |
| "epoch": 13.066298342541437, |
| "grad_norm": 0.13378721475601196, |
| "learning_rate": 7.965763251768288e-07, |
| "loss": 0.0198, |
| "step": 18920 |
| }, |
| { |
| "epoch": 13.073204419889503, |
| "grad_norm": 0.16803504526615143, |
| "learning_rate": 7.819450824473995e-07, |
| "loss": 0.0212, |
| "step": 18930 |
| }, |
| { |
| "epoch": 13.08011049723757, |
| "grad_norm": 0.12649837136268616, |
| "learning_rate": 7.674484000007198e-07, |
| "loss": 0.0189, |
| "step": 18940 |
| }, |
| { |
| "epoch": 13.087016574585636, |
| "grad_norm": 0.13606299459934235, |
| "learning_rate": 7.530863174701752e-07, |
| "loss": 0.0178, |
| "step": 18950 |
| }, |
| { |
| "epoch": 13.0939226519337, |
| "grad_norm": 0.20246635377407074, |
| "learning_rate": 7.38858874121151e-07, |
| "loss": 0.0196, |
| "step": 18960 |
| }, |
| { |
| "epoch": 13.100828729281767, |
| "grad_norm": 0.10595327615737915, |
| "learning_rate": 7.247661088509328e-07, |
| "loss": 0.0189, |
| "step": 18970 |
| }, |
| { |
| "epoch": 13.107734806629834, |
| "grad_norm": 0.20289529860019684, |
| "learning_rate": 7.108080601886002e-07, |
| "loss": 0.0185, |
| "step": 18980 |
| }, |
| { |
| "epoch": 13.1146408839779, |
| "grad_norm": 0.16404929757118225, |
| "learning_rate": 6.969847662949336e-07, |
| "loss": 0.0158, |
| "step": 18990 |
| }, |
| { |
| "epoch": 13.121546961325967, |
| "grad_norm": 0.09362056851387024, |
| "learning_rate": 6.832962649622798e-07, |
| "loss": 0.0164, |
| "step": 19000 |
| }, |
| { |
| "epoch": 13.128453038674033, |
| "grad_norm": 0.09150226414203644, |
| "learning_rate": 6.697425936144863e-07, |
| "loss": 0.0161, |
| "step": 19010 |
| }, |
| { |
| "epoch": 13.1353591160221, |
| "grad_norm": 0.1991247683763504, |
| "learning_rate": 6.563237893067731e-07, |
| "loss": 0.0201, |
| "step": 19020 |
| }, |
| { |
| "epoch": 13.142265193370166, |
| "grad_norm": 0.1374427080154419, |
| "learning_rate": 6.430398887256328e-07, |
| "loss": 0.02, |
| "step": 19030 |
| }, |
| { |
| "epoch": 13.149171270718233, |
| "grad_norm": 0.200238436460495, |
| "learning_rate": 6.298909281887478e-07, |
| "loss": 0.0177, |
| "step": 19040 |
| }, |
| { |
| "epoch": 13.1560773480663, |
| "grad_norm": 0.14022763073444366, |
| "learning_rate": 6.168769436448673e-07, |
| "loss": 0.0177, |
| "step": 19050 |
| }, |
| { |
| "epoch": 13.162983425414364, |
| "grad_norm": 0.13676732778549194, |
| "learning_rate": 6.03997970673742e-07, |
| "loss": 0.0156, |
| "step": 19060 |
| }, |
| { |
| "epoch": 13.16988950276243, |
| "grad_norm": 0.12980788946151733, |
| "learning_rate": 5.912540444859782e-07, |
| "loss": 0.0182, |
| "step": 19070 |
| }, |
| { |
| "epoch": 13.176795580110497, |
| "grad_norm": 0.1635156124830246, |
| "learning_rate": 5.786451999229837e-07, |
| "loss": 0.0168, |
| "step": 19080 |
| }, |
| { |
| "epoch": 13.183701657458563, |
| "grad_norm": 0.1873101443052292, |
| "learning_rate": 5.661714714568722e-07, |
| "loss": 0.0174, |
| "step": 19090 |
| }, |
| { |
| "epoch": 13.19060773480663, |
| "grad_norm": 0.10086224973201752, |
| "learning_rate": 5.538328931903259e-07, |
| "loss": 0.0153, |
| "step": 19100 |
| }, |
| { |
| "epoch": 13.197513812154696, |
| "grad_norm": 0.12298750132322311, |
| "learning_rate": 5.416294988565551e-07, |
| "loss": 0.0207, |
| "step": 19110 |
| }, |
| { |
| "epoch": 13.204419889502763, |
| "grad_norm": 0.12365196645259857, |
| "learning_rate": 5.29561321819172e-07, |
| "loss": 0.0168, |
| "step": 19120 |
| }, |
| { |
| "epoch": 13.21132596685083, |
| "grad_norm": 0.10630401968955994, |
| "learning_rate": 5.176283950721061e-07, |
| "loss": 0.0172, |
| "step": 19130 |
| }, |
| { |
| "epoch": 13.218232044198896, |
| "grad_norm": 0.11978129297494888, |
| "learning_rate": 5.058307512395332e-07, |
| "loss": 0.0193, |
| "step": 19140 |
| }, |
| { |
| "epoch": 13.225138121546962, |
| "grad_norm": 0.11279390752315521, |
| "learning_rate": 4.941684225757526e-07, |
| "loss": 0.0184, |
| "step": 19150 |
| }, |
| { |
| "epoch": 13.232044198895027, |
| "grad_norm": 0.0903186947107315, |
| "learning_rate": 4.826414409651314e-07, |
| "loss": 0.0206, |
| "step": 19160 |
| }, |
| { |
| "epoch": 13.238950276243093, |
| "grad_norm": 0.15330806374549866, |
| "learning_rate": 4.712498379219943e-07, |
| "loss": 0.0202, |
| "step": 19170 |
| }, |
| { |
| "epoch": 13.24585635359116, |
| "grad_norm": 0.10040513426065445, |
| "learning_rate": 4.599936445905506e-07, |
| "loss": 0.0176, |
| "step": 19180 |
| }, |
| { |
| "epoch": 13.252762430939226, |
| "grad_norm": 0.14349868893623352, |
| "learning_rate": 4.4887289174480594e-07, |
| "loss": 0.0179, |
| "step": 19190 |
| }, |
| { |
| "epoch": 13.259668508287293, |
| "grad_norm": 0.14012572169303894, |
| "learning_rate": 4.378876097884621e-07, |
| "loss": 0.0196, |
| "step": 19200 |
| }, |
| { |
| "epoch": 13.26657458563536, |
| "grad_norm": 0.09931646287441254, |
| "learning_rate": 4.2703782875487264e-07, |
| "loss": 0.0158, |
| "step": 19210 |
| }, |
| { |
| "epoch": 13.273480662983426, |
| "grad_norm": 0.13279908895492554, |
| "learning_rate": 4.163235783069208e-07, |
| "loss": 0.0188, |
| "step": 19220 |
| }, |
| { |
| "epoch": 13.280386740331492, |
| "grad_norm": 0.15899476408958435, |
| "learning_rate": 4.057448877369585e-07, |
| "loss": 0.0192, |
| "step": 19230 |
| }, |
| { |
| "epoch": 13.287292817679559, |
| "grad_norm": 0.1579292118549347, |
| "learning_rate": 3.9530178596672295e-07, |
| "loss": 0.0159, |
| "step": 19240 |
| }, |
| { |
| "epoch": 13.294198895027625, |
| "grad_norm": 0.15862807631492615, |
| "learning_rate": 3.849943015472479e-07, |
| "loss": 0.0206, |
| "step": 19250 |
| }, |
| { |
| "epoch": 13.30110497237569, |
| "grad_norm": 0.09818954765796661, |
| "learning_rate": 3.748224626588137e-07, |
| "loss": 0.018, |
| "step": 19260 |
| }, |
| { |
| "epoch": 13.308011049723756, |
| "grad_norm": 0.1983523666858673, |
| "learning_rate": 3.647862971108307e-07, |
| "loss": 0.0163, |
| "step": 19270 |
| }, |
| { |
| "epoch": 13.314917127071823, |
| "grad_norm": 0.1573604941368103, |
| "learning_rate": 3.5488583234179473e-07, |
| "loss": 0.0191, |
| "step": 19280 |
| }, |
| { |
| "epoch": 13.32182320441989, |
| "grad_norm": 0.08943048864603043, |
| "learning_rate": 3.4512109541920413e-07, |
| "loss": 0.0204, |
| "step": 19290 |
| }, |
| { |
| "epoch": 13.328729281767956, |
| "grad_norm": 0.09052513539791107, |
| "learning_rate": 3.354921130394706e-07, |
| "loss": 0.0188, |
| "step": 19300 |
| }, |
| { |
| "epoch": 13.335635359116022, |
| "grad_norm": 0.14040173590183258, |
| "learning_rate": 3.259989115278639e-07, |
| "loss": 0.0168, |
| "step": 19310 |
| }, |
| { |
| "epoch": 13.342541436464089, |
| "grad_norm": 0.12269480526447296, |
| "learning_rate": 3.1664151683843403e-07, |
| "loss": 0.0168, |
| "step": 19320 |
| }, |
| { |
| "epoch": 13.349447513812155, |
| "grad_norm": 0.14032500982284546, |
| "learning_rate": 3.074199545539447e-07, |
| "loss": 0.0155, |
| "step": 19330 |
| }, |
| { |
| "epoch": 13.356353591160222, |
| "grad_norm": 0.1279202252626419, |
| "learning_rate": 2.983342498857955e-07, |
| "loss": 0.0197, |
| "step": 19340 |
| }, |
| { |
| "epoch": 13.363259668508288, |
| "grad_norm": 0.12618181109428406, |
| "learning_rate": 2.893844276739499e-07, |
| "loss": 0.0153, |
| "step": 19350 |
| }, |
| { |
| "epoch": 13.370165745856353, |
| "grad_norm": 0.12394572794437408, |
| "learning_rate": 2.8057051238688514e-07, |
| "loss": 0.0196, |
| "step": 19360 |
| }, |
| { |
| "epoch": 13.37707182320442, |
| "grad_norm": 0.09562724083662033, |
| "learning_rate": 2.71892528121509e-07, |
| "loss": 0.0157, |
| "step": 19370 |
| }, |
| { |
| "epoch": 13.383977900552486, |
| "grad_norm": 0.17502541840076447, |
| "learning_rate": 2.633504986030988e-07, |
| "loss": 0.0215, |
| "step": 19380 |
| }, |
| { |
| "epoch": 13.390883977900552, |
| "grad_norm": 0.16975905001163483, |
| "learning_rate": 2.549444471852347e-07, |
| "loss": 0.0159, |
| "step": 19390 |
| }, |
| { |
| "epoch": 13.397790055248619, |
| "grad_norm": 0.14912907779216766, |
| "learning_rate": 2.4667439684974423e-07, |
| "loss": 0.0195, |
| "step": 19400 |
| }, |
| { |
| "epoch": 13.404696132596685, |
| "grad_norm": 0.13770301640033722, |
| "learning_rate": 2.3854037020662467e-07, |
| "loss": 0.0183, |
| "step": 19410 |
| }, |
| { |
| "epoch": 13.411602209944752, |
| "grad_norm": 0.11400759220123291, |
| "learning_rate": 2.3054238949399288e-07, |
| "loss": 0.0208, |
| "step": 19420 |
| }, |
| { |
| "epoch": 13.418508287292818, |
| "grad_norm": 0.1524895429611206, |
| "learning_rate": 2.2268047657802993e-07, |
| "loss": 0.0164, |
| "step": 19430 |
| }, |
| { |
| "epoch": 13.425414364640885, |
| "grad_norm": 0.18355759978294373, |
| "learning_rate": 2.149546529529034e-07, |
| "loss": 0.0178, |
| "step": 19440 |
| }, |
| { |
| "epoch": 13.432320441988951, |
| "grad_norm": 0.1252148151397705, |
| "learning_rate": 2.0736493974071736e-07, |
| "loss": 0.021, |
| "step": 19450 |
| }, |
| { |
| "epoch": 13.439226519337016, |
| "grad_norm": 0.10146128386259079, |
| "learning_rate": 1.9991135769145686e-07, |
| "loss": 0.0129, |
| "step": 19460 |
| }, |
| { |
| "epoch": 13.446132596685082, |
| "grad_norm": 0.13486264646053314, |
| "learning_rate": 1.9259392718293245e-07, |
| "loss": 0.0182, |
| "step": 19470 |
| }, |
| { |
| "epoch": 13.453038674033149, |
| "grad_norm": 0.14005959033966064, |
| "learning_rate": 1.8541266822072467e-07, |
| "loss": 0.0211, |
| "step": 19480 |
| }, |
| { |
| "epoch": 13.459944751381215, |
| "grad_norm": 0.1532914638519287, |
| "learning_rate": 1.7836760043811184e-07, |
| "loss": 0.0231, |
| "step": 19490 |
| }, |
| { |
| "epoch": 13.466850828729282, |
| "grad_norm": 0.05971955880522728, |
| "learning_rate": 1.7145874309604792e-07, |
| "loss": 0.0143, |
| "step": 19500 |
| }, |
| { |
| "epoch": 13.473756906077348, |
| "grad_norm": 0.11946941912174225, |
| "learning_rate": 1.6468611508308474e-07, |
| "loss": 0.0172, |
| "step": 19510 |
| }, |
| { |
| "epoch": 13.480662983425415, |
| "grad_norm": 0.13063247501850128, |
| "learning_rate": 1.5804973491532204e-07, |
| "loss": 0.021, |
| "step": 19520 |
| }, |
| { |
| "epoch": 13.487569060773481, |
| "grad_norm": 0.1444508135318756, |
| "learning_rate": 1.5154962073637424e-07, |
| "loss": 0.017, |
| "step": 19530 |
| }, |
| { |
| "epoch": 13.494475138121548, |
| "grad_norm": 0.13403776288032532, |
| "learning_rate": 1.4518579031730372e-07, |
| "loss": 0.0168, |
| "step": 19540 |
| }, |
| { |
| "epoch": 13.501381215469614, |
| "grad_norm": 0.0963289737701416, |
| "learning_rate": 1.389582610565876e-07, |
| "loss": 0.0163, |
| "step": 19550 |
| }, |
| { |
| "epoch": 13.50828729281768, |
| "grad_norm": 0.07779234647750854, |
| "learning_rate": 1.3286704998003995e-07, |
| "loss": 0.0238, |
| "step": 19560 |
| }, |
| { |
| "epoch": 13.515193370165745, |
| "grad_norm": 0.12145277112722397, |
| "learning_rate": 1.2691217374080632e-07, |
| "loss": 0.0236, |
| "step": 19570 |
| }, |
| { |
| "epoch": 13.522099447513812, |
| "grad_norm": 0.1347256600856781, |
| "learning_rate": 1.2109364861929705e-07, |
| "loss": 0.0193, |
| "step": 19580 |
| }, |
| { |
| "epoch": 13.529005524861878, |
| "grad_norm": 0.15796039998531342, |
| "learning_rate": 1.1541149052312628e-07, |
| "loss": 0.0157, |
| "step": 19590 |
| }, |
| { |
| "epoch": 13.535911602209945, |
| "grad_norm": 0.15612980723381042, |
| "learning_rate": 1.0986571498710074e-07, |
| "loss": 0.0163, |
| "step": 19600 |
| }, |
| { |
| "epoch": 13.542817679558011, |
| "grad_norm": 0.13418418169021606, |
| "learning_rate": 1.0445633717316438e-07, |
| "loss": 0.0167, |
| "step": 19610 |
| }, |
| { |
| "epoch": 13.549723756906078, |
| "grad_norm": 0.21959972381591797, |
| "learning_rate": 9.918337187034277e-08, |
| "loss": 0.0156, |
| "step": 19620 |
| }, |
| { |
| "epoch": 13.556629834254144, |
| "grad_norm": 0.11513973772525787, |
| "learning_rate": 9.404683349472643e-08, |
| "loss": 0.0194, |
| "step": 19630 |
| }, |
| { |
| "epoch": 13.56353591160221, |
| "grad_norm": 0.13620199263095856, |
| "learning_rate": 8.904673608940983e-08, |
| "loss": 0.0164, |
| "step": 19640 |
| }, |
| { |
| "epoch": 13.570441988950277, |
| "grad_norm": 0.15609873831272125, |
| "learning_rate": 8.418309332447471e-08, |
| "loss": 0.0145, |
| "step": 19650 |
| }, |
| { |
| "epoch": 13.577348066298342, |
| "grad_norm": 0.1274515688419342, |
| "learning_rate": 7.945591849692902e-08, |
| "loss": 0.0162, |
| "step": 19660 |
| }, |
| { |
| "epoch": 13.584254143646408, |
| "grad_norm": 0.15051686763763428, |
| "learning_rate": 7.486522453069578e-08, |
| "loss": 0.0232, |
| "step": 19670 |
| }, |
| { |
| "epoch": 13.591160220994475, |
| "grad_norm": 0.17713497579097748, |
| "learning_rate": 7.041102397655208e-08, |
| "loss": 0.0163, |
| "step": 19680 |
| }, |
| { |
| "epoch": 13.598066298342541, |
| "grad_norm": 0.11196551471948624, |
| "learning_rate": 6.609332901210685e-08, |
| "loss": 0.0144, |
| "step": 19690 |
| }, |
| { |
| "epoch": 13.604972375690608, |
| "grad_norm": 0.1326775848865509, |
| "learning_rate": 6.191215144178419e-08, |
| "loss": 0.0188, |
| "step": 19700 |
| }, |
| { |
| "epoch": 13.611878453038674, |
| "grad_norm": 0.19205215573310852, |
| "learning_rate": 5.786750269675678e-08, |
| "loss": 0.0206, |
| "step": 19710 |
| }, |
| { |
| "epoch": 13.61878453038674, |
| "grad_norm": 0.18258792161941528, |
| "learning_rate": 5.395939383494031e-08, |
| "loss": 0.0212, |
| "step": 19720 |
| }, |
| { |
| "epoch": 13.625690607734807, |
| "grad_norm": 0.1334686279296875, |
| "learning_rate": 5.018783554095463e-08, |
| "loss": 0.0199, |
| "step": 19730 |
| }, |
| { |
| "epoch": 13.632596685082873, |
| "grad_norm": 0.08144468069076538, |
| "learning_rate": 4.655283812610156e-08, |
| "loss": 0.0158, |
| "step": 19740 |
| }, |
| { |
| "epoch": 13.63950276243094, |
| "grad_norm": 0.1487710326910019, |
| "learning_rate": 4.305441152831491e-08, |
| "loss": 0.0219, |
| "step": 19750 |
| }, |
| { |
| "epoch": 13.646408839779006, |
| "grad_norm": 0.10869629681110382, |
| "learning_rate": 3.9692565312171584e-08, |
| "loss": 0.0192, |
| "step": 19760 |
| }, |
| { |
| "epoch": 13.653314917127071, |
| "grad_norm": 0.12427249550819397, |
| "learning_rate": 3.6467308668824975e-08, |
| "loss": 0.0183, |
| "step": 19770 |
| }, |
| { |
| "epoch": 13.660220994475138, |
| "grad_norm": 0.1521531194448471, |
| "learning_rate": 3.3378650416004964e-08, |
| "loss": 0.0196, |
| "step": 19780 |
| }, |
| { |
| "epoch": 13.667127071823204, |
| "grad_norm": 0.14662829041481018, |
| "learning_rate": 3.042659899797906e-08, |
| "loss": 0.0215, |
| "step": 19790 |
| }, |
| { |
| "epoch": 13.67403314917127, |
| "grad_norm": 0.12476713210344315, |
| "learning_rate": 2.76111624855524e-08, |
| "loss": 0.0172, |
| "step": 19800 |
| }, |
| { |
| "epoch": 13.680939226519337, |
| "grad_norm": 0.15183356404304504, |
| "learning_rate": 2.4932348576017784e-08, |
| "loss": 0.0182, |
| "step": 19810 |
| }, |
| { |
| "epoch": 13.687845303867404, |
| "grad_norm": 0.1421179175376892, |
| "learning_rate": 2.239016459314458e-08, |
| "loss": 0.0157, |
| "step": 19820 |
| }, |
| { |
| "epoch": 13.69475138121547, |
| "grad_norm": 0.13849793374538422, |
| "learning_rate": 1.9984617487173174e-08, |
| "loss": 0.0213, |
| "step": 19830 |
| }, |
| { |
| "epoch": 13.701657458563536, |
| "grad_norm": 0.12221692502498627, |
| "learning_rate": 1.7715713834776105e-08, |
| "loss": 0.0177, |
| "step": 19840 |
| }, |
| { |
| "epoch": 13.708563535911603, |
| "grad_norm": 0.22249464690685272, |
| "learning_rate": 1.5583459839046964e-08, |
| "loss": 0.0211, |
| "step": 19850 |
| }, |
| { |
| "epoch": 13.715469613259668, |
| "grad_norm": 0.14573796093463898, |
| "learning_rate": 1.3587861329489304e-08, |
| "loss": 0.0241, |
| "step": 19860 |
| }, |
| { |
| "epoch": 13.722375690607734, |
| "grad_norm": 0.1393386274576187, |
| "learning_rate": 1.1728923761994415e-08, |
| "loss": 0.0183, |
| "step": 19870 |
| }, |
| { |
| "epoch": 13.7292817679558, |
| "grad_norm": 0.16851302981376648, |
| "learning_rate": 1.0006652218819135e-08, |
| "loss": 0.0194, |
| "step": 19880 |
| }, |
| { |
| "epoch": 13.736187845303867, |
| "grad_norm": 0.12711794674396515, |
| "learning_rate": 8.421051408596947e-09, |
| "loss": 0.0163, |
| "step": 19890 |
| }, |
| { |
| "epoch": 13.743093922651934, |
| "grad_norm": 0.16962414979934692, |
| "learning_rate": 6.972125666299123e-09, |
| "loss": 0.0194, |
| "step": 19900 |
| }, |
| { |
| "epoch": 13.75, |
| "grad_norm": 0.13897208869457245, |
| "learning_rate": 5.659878953229169e-09, |
| "loss": 0.0163, |
| "step": 19910 |
| }, |
| { |
| "epoch": 13.756906077348066, |
| "grad_norm": 0.1462239772081375, |
| "learning_rate": 4.48431485701728e-09, |
| "loss": 0.0155, |
| "step": 19920 |
| }, |
| { |
| "epoch": 13.763812154696133, |
| "grad_norm": 0.16802477836608887, |
| "learning_rate": 3.4454365916203322e-09, |
| "loss": 0.0153, |
| "step": 19930 |
| }, |
| { |
| "epoch": 13.7707182320442, |
| "grad_norm": 0.1017882451415062, |
| "learning_rate": 2.5432469972830332e-09, |
| "loss": 0.0157, |
| "step": 19940 |
| }, |
| { |
| "epoch": 13.777624309392266, |
| "grad_norm": 0.16575653851032257, |
| "learning_rate": 1.7777485405601203e-09, |
| "loss": 0.0203, |
| "step": 19950 |
| }, |
| { |
| "epoch": 13.784530386740332, |
| "grad_norm": 0.13925835490226746, |
| "learning_rate": 1.1489433142941597e-09, |
| "loss": 0.0181, |
| "step": 19960 |
| }, |
| { |
| "epoch": 13.791436464088397, |
| "grad_norm": 0.13744530081748962, |
| "learning_rate": 6.568330376210963e-10, |
| "loss": 0.0179, |
| "step": 19970 |
| }, |
| { |
| "epoch": 13.798342541436464, |
| "grad_norm": 0.09223198145627975, |
| "learning_rate": 3.0141905594249787e-10, |
| "loss": 0.0155, |
| "step": 19980 |
| }, |
| { |
| "epoch": 13.80524861878453, |
| "grad_norm": 0.11717510968446732, |
| "learning_rate": 8.270234094776008e-11, |
| "loss": 0.0192, |
| "step": 19990 |
| }, |
| { |
| "epoch": 13.812154696132596, |
| "grad_norm": 0.17134608328342438, |
| "learning_rate": 6.834906085551041e-13, |
| "loss": 0.0186, |
| "step": 20000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 14, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|